/**
* Word comment injection with reply threading
*
* Flow:
* 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
* - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
* - Subsequent adjacent comments = replies (no markers, attach to parent)
* 2. Pandoc converts to DOCX
* 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
* - Replies go in comments.xml with parent reference in commentsExtended.xml
*/
import * as fs from 'fs';
import AdmZip from 'adm-zip';
import { escapeXml } from './utils.js';
const MARKER_START_PREFIX = '⟦CMS:';
const MARKER_END_PREFIX = '⟦CME:';
const MARKER_SUFFIX = '⟧';
interface ParsedComment {
author: string;
text: string;
anchor: string | null;
start: number;
end: number;
fullMatch: string;
}
interface PreparedComment extends ParsedComment {
isReply: boolean;
parentIdx: number | null;
commentIdx: number;
anchorFromReply?: boolean;
placesParentMarkers?: boolean;
}
interface PrepareResult {
markedMarkdown: string;
comments: PreparedComment[];
}
interface CommentWithIds extends PreparedComment {
id: string;
paraId: string;
paraId2: string;
durableId: string;
parentParaId?: string;
}
interface InjectionResult {
success: boolean;
commentCount: number;
replyCount?: number;
skippedComments: number;
error?: string;
}
function generateParaId(commentIdx: number, paraNum: number): string {
// Generate 8-character uppercase hex ID matching Word format
// Word uses IDs like "3F25BC58", "0331C187"
// Must be deterministic - same inputs always produce same output
const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
return id.toString(16).toUpperCase().padStart(8, '0');
}
/**
* Parse comments and create markers
*
* Returns:
* - markedMarkdown: markdown with markers for parent comments only
* - comments: array with author, text, isReply, parentIdx
*/
export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
// Match all comments with optional anchor
const commentPattern = /\{>>(.+?)<<\}(?:\s*\[([^\]]+)\]\{\.mark\})?/g;
const rawMatches: ParsedComment[] = [];
let match: RegExpExecArray | null;
while ((match = commentPattern.exec(markdown)) !== null) {
const content = match[1] ?? '';
let author = 'Unknown';
let text = content;
const colonIdx = content.indexOf(':');
if (colonIdx > 0 && colonIdx < 30) {
author = content.slice(0, colonIdx).trim();
text = content.slice(colonIdx + 1).trim();
}
rawMatches.push({
author,
text,
anchor: match[2] || null,
start: match.index,
end: match.index + match[0].length,
fullMatch: match[0]
});
}
if (rawMatches.length === 0) {
return { markedMarkdown: markdown, comments: [] };
}
// Detect reply relationships based on adjacency
// First comment in a cluster = parent, all subsequent = replies to that parent
// Comments are "adjacent" if there's minimal text between them (< 10 chars)
const ADJACENT_THRESHOLD = 10;
const comments: PreparedComment[] = [];
let clusterParentIdx = -1; // Index of first comment in current cluster
let lastCommentEnd = -1;
for (let i = 0; i < rawMatches.length; i++) {
const m = rawMatches[i];
if (!m) continue;
// Check if this comment is adjacent to the previous one
const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
const isAdjacent = gap < ADJACENT_THRESHOLD;
// Reset cluster if there's a gap (comments not in same cluster)
if (!isAdjacent) {
clusterParentIdx = -1;
}
if (clusterParentIdx === -1) {
// First comment in cluster = parent (regardless of author)
comments.push({
author: m.author,
text: m.text,
anchor: m.anchor,
start: m.start,
end: m.end,
fullMatch: m.fullMatch,
isReply: false,
parentIdx: null,
commentIdx: comments.length
});
clusterParentIdx = comments.length - 1;
} else {
// Subsequent comment in cluster = reply to first comment
comments.push({
author: m.author,
text: m.text,
anchor: m.anchor,
start: m.start,
end: m.end,
fullMatch: m.fullMatch,
isReply: true,
parentIdx: clusterParentIdx,
commentIdx: comments.length
});
}
lastCommentEnd = m.end;
}
// Propagate anchors from replies to parents
// If a reply has an anchor but its parent doesn't, move the anchor to the parent
// Track flags for special handling during marker generation
for (const c of comments) {
if (c.isReply && c.anchor && c.parentIdx !== null) {
const parent = comments[c.parentIdx];
if (parent && !parent.anchor) {
parent.anchor = c.anchor;
parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
c.placesParentMarkers = true; // This reply should place the parent's markers
c.anchor = null;
}
}
}
// Build marked markdown - only parent comments get markers
// Process from end to start to preserve positions
let markedMarkdown = markdown;
for (let i = comments.length - 1; i >= 0; i--) {
const c = comments[i];
if (!c) continue;
if (c.isReply) {
// Reply: remove from document entirely (will be in comments.xml only)
// Also consume leading whitespace to avoid double spaces
let removeStart = c.start;
const charBefore = markedMarkdown[removeStart - 1];
while (removeStart > 0 && charBefore && /\s/.test(charBefore)) {
removeStart--;
}
// If this reply places parent's markers (anchor was propagated)
if (c.placesParentMarkers && c.parentIdx !== null) {
// Extract anchor text from the original match
const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
if (anchorMatch) {
const anchorText = anchorMatch[1] ?? '';
// Output markers with PARENT's index around the anchor text
const parentIdx = c.parentIdx;
const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
} else {
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
}
} else {
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
}
} else {
// Parent comment
if (c.anchorFromReply) {
// Anchor markers are placed by the reply, just remove this comment
let removeStart = c.start;
const charBefore = markedMarkdown[removeStart - 1];
while (removeStart > 0 && charBefore && /\s/.test(charBefore)) {
removeStart--;
}
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
} else {
// Normal case: replace with markers
const anchor = c.anchor || '';
const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
}
}
}
return { markedMarkdown, comments };
}
function createCommentsXml(comments: CommentWithIds[]): string {
// Word expects date without milliseconds: 2025-12-30T08:33:00Z
const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
let xml = '\n';
// Minimal namespaces matching golden file structure
xml += '';
// Use a consistent rsid (8-char hex) for all comments in this batch
const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
for (const comment of comments) {
xml += ``;
// First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
xml += ``;
xml += ``;
xml += `${escapeXml(comment.text)}`;
xml += ``;
if (comment.isReply) {
// Second empty paragraph: rsidRDefault matches rsidR
xml += ``;
}
xml += ``;
}
xml += '';
return xml;
}
function createCommentsExtendedXml(comments: CommentWithIds[]): string {
let xml = '\n';
// Minimal namespaces matching golden file structure
xml += '';
for (const comment of comments) {
if (comment.isReply && comment.parentParaId) {
// Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
xml += ``;
} else {
// Parent comment: use paraId (first paragraph)
xml += ``;
}
}
xml += '';
return xml;
}
function generateDurableId(index: number): string {
// Generate unique 8-char hex ID for durableId
// CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
// Word interprets durableIds as signed 32-bit integers
const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
return id.toString(16).toUpperCase().padStart(8, '0');
}
function createCommentsIdsXml(comments: CommentWithIds[]): string {
let xml = '\n';
// Minimal namespaces matching golden file structure
xml += '';
for (const comment of comments) {
// ONE entry per comment using the LAST paragraph's paraId:
// - Parent comments (1 paragraph): use paraId
// - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
xml += ``;
}
xml += '';
return xml;
}
function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
let xml = '\n';
// Minimal namespaces matching golden file structure
xml += '';
for (const comment of comments) {
// ONE entry per comment using the durableId
xml += ``;
}
xml += '';
return xml;
}
// Generate deterministic user IDs for authors (no hardcoded personal data)
function createPeopleXml(comments: CommentWithIds[]): string {
// Extract unique authors
const authors = [...new Set(comments.map(c => c.author))];
let xml = '\n';
xml += '';
for (const author of authors) {
const userId = generateUserId(author);
xml += ``;
xml += ``;
xml += ``;
}
xml += '';
return xml;
}
function generateUserId(author: string): string {
// Generate a deterministic 16-char hex ID from author name
let hash = 0;
for (let i = 0; i < author.length; i++) {
hash = ((hash << 5) - hash) + author.charCodeAt(i);
hash = hash & hash;
}
return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
}
/**
* Inject comments at marker positions
*/
export async function injectCommentsAtMarkers(
docxPath: string,
comments: PreparedComment[],
outputPath: string
): Promise {
try {
if (!fs.existsSync(docxPath)) {
return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
}
if (comments.length === 0) {
fs.copyFileSync(docxPath, outputPath);
return { success: true, commentCount: 0, skippedComments: 0 };
}
const zip = new AdmZip(docxPath);
const documentEntry = zip.getEntry('word/document.xml');
if (!documentEntry) {
return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
}
let documentXml = zip.readAsText(documentEntry);
// Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
...c,
id: String(idx + 1),
paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
}));
// Link replies to parent paraIds
for (const c of commentsWithIds) {
if (c.isReply && c.parentIdx !== null) {
const parent = commentsWithIds[c.parentIdx];
if (parent) {
c.parentParaId = parent.paraId;
}
}
}
const injectedIds = new Set();
// Process only parent comments (non-replies) for document ranges
const parentComments = commentsWithIds.filter(c => !c.isReply);
for (let i = parentComments.length - 1; i >= 0; i--) {
const comment = parentComments[i];
if (!comment) continue;
const idx = comment.commentIdx;
const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
const startPos = documentXml.indexOf(startMarker);
const endPos = documentXml.indexOf(endMarker);
if (startPos === -1 || endPos === -1) continue;
// Find the containing the markers
const rStartBefore = documentXml.lastIndexOf('', startPos);
const rStartOpen = documentXml.lastIndexOf('', endPos);
if (rStart === -1 || rEndPos === -1) continue;
const rEnd = rEndPos + ''.length;
const runContent = documentXml.slice(rStart, rEnd);
// Extract styling
const rPrMatch = runContent.match(/[\s\S]*?<\/w:rPr>/);
const rPr = rPrMatch ? rPrMatch[0] : '';
// Extract text
const textMatch = runContent.match(/]*>([\s\S]*?)<\/w:t>/);
if (!textMatch) continue;
const fullText = textMatch[1] ?? '';
const tElementMatch = textMatch[0].match(/]*>/);
if (!tElementMatch) continue;
const tElement = tElementMatch[0];
const startInText = fullText.indexOf(startMarker);
const endInText = fullText.indexOf(endMarker);
if (startInText === -1 || endInText === -1) continue;
let textBefore = fullText.slice(0, startInText);
let anchorText = fullText.slice(startInText + startMarker.length, endInText);
let textAfter = fullText.slice(endInText + endMarker.length);
// When anchor is empty, use the first word from textAfter as fallback
if (!anchorText && textAfter) {
const wordMatch = textAfter.match(/^\s*(\S+)/);
if (wordMatch) {
anchorText = wordMatch[1] ?? '';
textAfter = textAfter.slice(wordMatch[0].length);
}
}
// When anchor is still empty, normalize double spaces to single space
if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
textAfter = textAfter.slice(1); // Remove leading space from textAfter
}
// Build replacement
let replacement = '';
if (textBefore) {
replacement += `${rPr}${tElement}${textBefore}`;
}
// Find replies to this comment
const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
// Start ranges for parent AND all replies (nested)
replacement += ``;
for (const reply of replies) {
replacement += ``;
}
// Anchor text
if (anchorText) {
replacement += `${rPr}${tElement}${anchorText}`;
}
// End parent range and reference (NO rStyle wrapper - required for threading)
replacement += ``;
replacement += ``;
// End reply ranges and references (same position as parent, NO rStyle wrapper)
for (const reply of replies) {
replacement += ``;
replacement += ``;
injectedIds.add(reply.id);
}
if (textAfter) {
replacement += `${rPr}${tElement}${textAfter}`;
}
documentXml = documentXml.slice(0, rStart) + replacement + documentXml.slice(rEnd);
injectedIds.add(comment.id);
}
// Add required namespaces to document.xml for comment threading
const requiredNs: Record = {
'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
};
// Find ]*>/);
if (docTagMatch) {
let docTag = docTagMatch[0];
let modified = false;
for (const [attr, val] of Object.entries(requiredNs)) {
if (!docTag.includes(attr)) {
docTag = docTag.replace('>', ` ${attr}="${val}">`);
modified = true;
}
}
// Add mc:Ignorable if mc namespace was added
if (modified && !docTag.includes('mc:Ignorable')) {
docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
}
documentXml = documentXml.replace(docTagMatch[0], docTag);
}
// Update document.xml
zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
// All comments (parents + replies) go in comments.xml
// But only include if parent was injected
const includedComments = commentsWithIds.filter(c => {
if (!c.isReply) {
return injectedIds.has(c.id);
} else {
// Include reply if its parent was injected
const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
return parent && injectedIds.has(parent.id);
}
});
// Create comments.xml
const commentsXml = createCommentsXml(includedComments);
if (zip.getEntry('word/comments.xml')) {
zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
} else {
zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
}
// Create commentsExtended.xml with reply threading
const commentsExtXml = createCommentsExtendedXml(includedComments);
if (zip.getEntry('word/commentsExtended.xml')) {
zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
} else {
zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
}
// Create commentsIds.xml (Word 2016+)
const commentsIdsXml = createCommentsIdsXml(includedComments);
if (zip.getEntry('word/commentsIds.xml')) {
zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
} else {
zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
}
// Create commentsExtensible.xml (Word 2018+)
const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
if (zip.getEntry('word/commentsExtensible.xml')) {
zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
} else {
zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
}
// Create people.xml (author definitions with Windows Live IDs)
const peopleXml = createPeopleXml(includedComments);
if (zip.getEntry('word/people.xml')) {
zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
} else {
zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
}
// Update [Content_Types].xml
const contentTypesEntry = zip.getEntry('[Content_Types].xml');
if (contentTypesEntry) {
let contentTypes = zip.readAsText(contentTypesEntry);
if (!contentTypes.includes('comments.xml')) {
const insertPoint = contentTypes.lastIndexOf('');
contentTypes = contentTypes.slice(0, insertPoint) +
'\n' +
contentTypes.slice(insertPoint);
}
if (!contentTypes.includes('commentsExtended.xml')) {
const insertPoint = contentTypes.lastIndexOf('');
contentTypes = contentTypes.slice(0, insertPoint) +
'\n' +
contentTypes.slice(insertPoint);
}
if (!contentTypes.includes('commentsIds.xml')) {
const insertPoint = contentTypes.lastIndexOf('');
contentTypes = contentTypes.slice(0, insertPoint) +
'\n' +
contentTypes.slice(insertPoint);
}
if (!contentTypes.includes('commentsExtensible.xml')) {
const insertPoint = contentTypes.lastIndexOf('');
contentTypes = contentTypes.slice(0, insertPoint) +
'\n' +
contentTypes.slice(insertPoint);
}
if (!contentTypes.includes('people.xml')) {
const insertPoint = contentTypes.lastIndexOf('');
contentTypes = contentTypes.slice(0, insertPoint) +
'\n' +
contentTypes.slice(insertPoint);
}
zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
}
// Update relationships
const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
if (relsEntry) {
let rels = zip.readAsText(relsEntry);
const rIdMatches = rels.match(/rId(\d+)/g) || [];
const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
if (!rels.includes('comments.xml')) {
const insertPoint = rels.lastIndexOf('');
rels = rels.slice(0, insertPoint) +
`\n` +
rels.slice(insertPoint);
}
if (!rels.includes('commentsExtended.xml')) {
const insertPoint = rels.lastIndexOf('');
rels = rels.slice(0, insertPoint) +
`\n` +
rels.slice(insertPoint);
}
if (!rels.includes('commentsIds.xml')) {
const insertPoint = rels.lastIndexOf('');
rels = rels.slice(0, insertPoint) +
`\n` +
rels.slice(insertPoint);
}
if (!rels.includes('commentsExtensible.xml')) {
const insertPoint = rels.lastIndexOf('');
rels = rels.slice(0, insertPoint) +
`\n` +
rels.slice(insertPoint);
}
if (!rels.includes('people.xml')) {
const insertPoint = rels.lastIndexOf('');
rels = rels.slice(0, insertPoint) +
`\n` +
rels.slice(insertPoint);
}
zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
}
zip.writeZip(outputPath);
const parentCount = includedComments.filter(c => !c.isReply).length;
const replyCount = includedComments.filter(c => c.isReply).length;
return {
success: true,
commentCount: parentCount,
replyCount: replyCount,
skippedComments: comments.length - includedComments.length,
};
} catch (err: any) {
return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
}
}