import type { RichTextNode, RichTextRoot } from '@unito/integration-api'; const SENTINEL_OPENER = '【'; const SENTINEL_CLOSER = '】'; export type CanRender = (node: RichTextNode, ancestors: ReadonlyArray) => boolean; const BLOCK_PARENT_TYPES = new Set(['root', 'blockquote', 'listItem']); // ---- Encoder ---- export function encodeSentinels(rootNode: RichTextRoot, canRender: CanRender): RichTextRoot { return { type: 'root', children: encodeChildren(rootNode.children, [rootNode], true, canRender), }; } function encodeChildren( children: RichTextNode[], ancestors: ReadonlyArray, blockContext: boolean, canRender: CanRender, ): RichTextNode[] { const result: RichTextNode[] = []; for (const child of children) { if (child.type === 'text') { result.push(child); continue; } if (canRender(child, ancestors)) { if (child.children) { const childBlockContext = BLOCK_PARENT_TYPES.has(child.type); result.push({ ...child, children: encodeChildren(child.children, [child, ...ancestors], childBlockContext, canRender), }); } else { result.push(child); } continue; } const encode = ENCODERS[child.type]; if (encode) { result.push(...encode(child, canRender, ancestors)); continue; } result.push(...encodeAsSentinels(child, ancestors, blockContext, canRender)); } return result; } function encodeAsSentinels( node: RichTextNode, ancestors: ReadonlyArray, blockContext: boolean, canRender: CanRender, ): RichTextNode[] { if (!node.children || node.children.length === 0) { const marker = selfCloseMarker(node); return [blockContext ? wrapBlock(marker) : { type: 'text', value: marker }]; } const opener = openMarker(node); const closer = closeMarker(node); const inner = encodeChildren(node.children, [node, ...ancestors], blockContext, canRender); if (blockContext) return [wrapBlock(opener), ...inner, wrapBlock(closer)]; return [{ type: 'text', value: opener }, ...inner, { type: 'text', value: closer }]; } type NodeEncoder = (node: RichTextNode, canRender: CanRender, ancestors: ReadonlyArray) => RichTextNode[]; const ENCODERS: Record = { table: encodeTableAsMarkdown, }; function encodeTableAsMarkdown( node: RichTextNode, canRender: CanRender, ancestors: ReadonlyArray, ): RichTextNode[] { const [firstRow, ...bodyRows] = node.children ?? []; if (!firstRow) return []; const cellSep: RichTextNode = { type: 'text', value: ' | ' }; const rowSep: RichTextNode = { type: 'break' }; const encodeRow = (row: RichTextNode): RichTextNode[] => { const cells = (row.children ?? []).map(cell => escapeCellTextNodes(encodeChildren(cell.children ?? [], [cell, row, node, ...ancestors], false, canRender)), ); const inner = cells.flatMap((cell, i) => (i === 0 ? cell : [cellSep, ...cell])); return [{ type: 'text', value: '| ' }, ...inner, { type: 'text', value: ' |' }]; }; const separator: RichTextNode = { type: 'text', value: `| ${(firstRow.children ?? []).map(() => '---').join(' | ')} |`, }; const rows = [encodeRow(firstRow), [separator], ...bodyRows.map(encodeRow)]; const children = mergeAdjacentText(rows.flatMap((row, i) => (i === 0 ? row : [rowSep, ...row]))); return [{ type: 'paragraph', children }]; } function escapeCellTextNodes(nodes: RichTextNode[]): RichTextNode[] { return nodes.map(node => { if (node.type === 'text' && node.value !== undefined) { return { ...node, value: escapeMarkdownCell(node.value) }; } if (node.children) { return { ...node, children: escapeCellTextNodes(node.children) }; } return node; }); } function mergeAdjacentText(nodes: RichTextNode[]): RichTextNode[] { const result: RichTextNode[] = []; for (const node of nodes) { const last = result[result.length - 1]; if (last?.type === 'text' && node.type === 'text') { result[result.length - 1] = { type: 'text', value: (last.value ?? '') + (node.value ?? '') }; } else { result.push(node); } } return result; } function escapeMarkdownCell(value: string): string { return value.replace(/\\/g, '\\\\').replace(/\|/g, '\\|').replace(/\r?\n/g, ' '); } function wrapBlock(value: string): RichTextNode { return { type: 'paragraph', children: [{ type: 'text', value }] }; } function openMarker(node: RichTextNode): string { return `${SENTINEL_OPENER} ${node.type}${serializeAttrs(node.data)} ${SENTINEL_CLOSER}`; } function closeMarker(node: RichTextNode): string { return `${SENTINEL_OPENER} end ${node.type} ${SENTINEL_CLOSER}`; } function selfCloseMarker(node: RichTextNode): string { return `${SENTINEL_OPENER} ${node.type}${serializeAttrs(node.data)} / ${SENTINEL_CLOSER}`; } function serializeAttrs(data: Record | undefined): string { if (!data) return ''; const parts: string[] = []; for (const [key, value] of Object.entries(data)) { const serialized = serializeValue(value); if (serialized !== null) parts.push(`${key}=${serialized}`); } return parts.length === 0 ? '' : ' ' + parts.join(' '); } // Emits the value in JSON-literal form so the decoder can round-trip // types: strings as `"foo"`, booleans/numbers/bigints as `true` / `42` / // `9007199254740992`. Returns null when the value can't survive the wire // format (non-primitive, or string content with `"` / `】` / control chars // that would break the regex-based decoder). The encoder drops the entry // and keeps going — the rest of the node still round-trips. function serializeValue(value: unknown): string | null { if (typeof value === 'string') { if (value.includes('"') || value.includes(SENTINEL_CLOSER) || /[\n\r]/.test(value)) return null; return `"${value}"`; } if (typeof value === 'number' || typeof value === 'boolean' || typeof value === 'bigint') { return String(value); } return null; } // ---- Decoder ---- // // Reconstructing nodes from sentinel markers happens in two passes per parent, // after children are recursively decoded so inner sentinels resolve first: // // 1. expandPaired — same-parent pairing. The opener and closer markers live // in the same children list (typical for inline content like a `strong` // sentinel that sits next to text inside a paragraph). // // 2. expandCrossSibling — cross-block pairing. The opener landed in one // block sibling's text, the closer in a later one. Produced when an // unsupported block-level node was encoded with each marker wrapped in // its own paragraph. // // A producer's parser may split a single opener mid-tag across adjacent text // nodes; mergeFragmentedTags stitches them before the regex runs. const ESCAPED_OPENER = escapeRegex(SENTINEL_OPENER); const ESCAPED_CLOSER = escapeRegex(SENTINEL_CLOSER); // Matches a single opener marker. Group 1 = tag, group 2 = attrs. const OPENER_REGEX = new RegExp(`${ESCAPED_OPENER}\\s*(\\w+)([^${SENTINEL_CLOSER}]*)${ESCAPED_CLOSER}`); // Matches any marker (opener or closer). Group 1 = 'end ' for closers // (undefined for openers), group 2 = tag. Shared `g` regex is safe here: // scanMarkers iterates it via matchAll, which works on a clone and never // touches this instance's lastIndex. const MARKER_REGEX = new RegExp(`${ESCAPED_OPENER}\\s*(end\\s+)?(\\w+)[^${SENTINEL_CLOSER}]*${ESCAPED_CLOSER}`, 'g'); function escapeRegex(value: string): string { return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } function makeCloserRegex(rawTag: string): RegExp { return new RegExp(`${ESCAPED_OPENER}\\s*end\\s+${escapeRegex(rawTag)}\\s*${ESCAPED_CLOSER}`); } export function decodeSentinels(rootNode: RichTextRoot): RichTextRoot { const tablesDecoded = decodeMarkdownTables(rootNode); if (!subtreeHasMarkers(tablesDecoded.children)) return tablesDecoded; const recursed = tablesDecoded.children.map(decodeNode); return { type: 'root', children: expandCrossSibling(expandInChildren(recursed)) }; } function decodeMarkdownTables(rootNode: RichTextRoot): RichTextRoot { return { type: 'root', children: reparseChildrenForTables(rootNode.children) }; } const RECURSE_INTO = new Set(['blockquote', 'listOrdered', 'listUnordered', 'listItem']); function reparseChildrenForTables(children: RichTextNode[]): RichTextNode[] { const recursed = children.map(child => child.children && RECURSE_INTO.has(child.type) ? { ...child, children: reparseChildrenForTables(child.children) } : child, ); const result: RichTextNode[] = []; let i = 0; while (i < recursed.length) { const child = recursed[i]!; if (child.type === 'paragraph' && child.children) { const table = tryParseTableFromInline(child.children); if (table) { result.push(table); i++; continue; } } if (isInline(child)) { let end = i; while (end < recursed.length && isInline(recursed[end]!)) end++; const table = tryParseTableFromInline(recursed.slice(i, end)); if (table) { result.push(table); i = end; continue; } } result.push(child); i++; } return result; } const BLOCK_TYPES = new Set([ 'paragraph', 'heading1', 'heading2', 'heading3', 'heading4', 'heading5', 'heading6', 'blockquote', 'listOrdered', 'listUnordered', 'listItem', 'code', 'table', 'tableRow', 'tableCell', 'thematicBreak', ]); function isInline(node: RichTextNode): boolean { return !BLOCK_TYPES.has(node.type); } function tryParseTableFromInline(inline: RichTextNode[]): RichTextNode | null { if (inline.length === 0) return null; const rows = splitIntoRows(inline); if (rows.length < 2) return null; const headerCells = splitRowAtPipes(rows[0]!); const separatorCells = splitRowAtPipes(rows[1]!); if (!headerCells || !separatorCells) return null; if (separatorCells.length !== headerCells.length) return null; if (!separatorCells.every(isSeparatorCell)) return null; const bodyCells: RichTextNode[][][] = []; for (const row of rows.slice(2)) { const cells = splitRowAtPipes(row); if (!cells || cells.length !== headerCells.length) return null; bodyCells.push(cells); } return { type: 'table', children: [buildTableRow(headerCells, true), ...bodyCells.map(cells => buildTableRow(cells, false))], }; } function splitIntoRows(inline: RichTextNode[]): RichTextNode[][] { const rows: RichTextNode[][] = [[]]; const push = (n: RichTextNode) => rows[rows.length - 1]!.push(n); for (const child of inline) { if (child.type === 'break') { rows.push([]); continue; } if (child.type === 'text' && child.value?.includes('\n')) { const [head, ...tail] = child.value.split('\n'); if (head) push({ type: 'text', value: head }); for (const seg of tail) { rows.push([]); if (seg) push({ type: 'text', value: seg }); } continue; } push(child); } return rows; } function isSeparatorCell(cell: RichTextNode[]): boolean { if (cell.length !== 1) return false; const node = cell[0]; return node?.type === 'text' && node.value === '---'; } function splitRowAtPipes(row: RichTextNode[]): RichTextNode[][] | null { const cells: RichTextNode[][] = []; let current: RichTextNode[] = []; let started = false; for (const node of row) { if (node.type !== 'text' || node.value === undefined) { current.push(node); continue; } const segments = splitOnUnescapedPipe(node.value); for (let i = 0; i < segments.length; i++) { if (i > 0) { if (!started) { if (hasNonWhitespace(current)) return null; started = true; } else { cells.push(current); } current = []; } if (segments[i] !== '') current.push({ type: 'text', value: segments[i]! }); } } if (!started || hasNonWhitespace(current)) return null; return cells.map(trimCellContent); } function hasNonWhitespace(nodes: RichTextNode[]): boolean { return nodes.some(n => n.type !== 'text' || (n.value !== undefined && n.value.trim() !== '')); } function splitOnUnescapedPipe(text: string): string[] { const segments: string[] = []; let current = ''; for (let i = 0; i < text.length; i++) { const c = text[i]; if (c === '\\' && i + 1 < text.length) { current += c + text[i + 1]; i++; continue; } if (c === '|') { segments.push(current); current = ''; continue; } current += c; } segments.push(current); return segments; } function trimCellContent(cell: RichTextNode[]): RichTextNode[] { const result = cell.map(node => node.type === 'text' && node.value !== undefined ? { ...node, value: unescapeCellText(node.value) } : node, ); const first = result[0]; if (first?.type === 'text' && first.value !== undefined) { const trimmed = first.value.replace(/^\s+/, ''); if (trimmed === '') result.shift(); else result[0] = { ...first, value: trimmed }; } const last = result[result.length - 1]; if (last?.type === 'text' && last.value !== undefined) { const trimmed = last.value.replace(/\s+$/, ''); if (trimmed === '') result.pop(); else result[result.length - 1] = { ...last, value: trimmed }; } return result; } function unescapeCellText(value: string): string { return value.replace(/\\\|/g, '|').replace(/\\\\/g, '\\'); } function buildTableRow(cells: RichTextNode[][], header: boolean): RichTextNode { return { type: 'tableRow', children: cells.map(content => ({ type: 'tableCell', ...(header ? { data: { header: true } } : {}), children: content, })), }; } function decodeNode(node: RichTextNode): RichTextNode { if (!node.children || !subtreeHasMarkers(node.children)) return node; const recursed = node.children.map(decodeNode); const expanded = expandCrossSibling(expandInChildren(recursed)); return { ...node, children: expanded }; } function subtreeHasMarkers(children: RichTextNode[]): boolean { for (const child of children) { if (child.type === 'text' && child.value?.includes(SENTINEL_OPENER)) return true; if (child.children && subtreeHasMarkers(child.children)) return true; } return false; } function expandInChildren(children: RichTextNode[]): RichTextNode[] { if (!children.some(c => c.type === 'text' && c.value?.includes(SENTINEL_OPENER))) return children; return expandPaired(mergeFragmentedTags(children)); } // If a producer's parser split a single opener across adjacent text nodes, // stitch them back so the regex can match the marker as a whole. function mergeFragmentedTags(children: RichTextNode[]): RichTextNode[] { const result: RichTextNode[] = []; for (let i = 0; i < children.length; i++) { const child = children[i]; if (!child) continue; const next = children[i + 1]; if ( child.type === 'text' && child.value && child.value.lastIndexOf(SENTINEL_OPENER) > child.value.lastIndexOf(SENTINEL_CLOSER) && next?.type === 'text' && next.value ) { result.push({ type: 'text', value: child.value + next.value }); i++; } else { result.push(child); } } return result; } // Walk children left-to-right. For each text node, look for an opener; if // found, locate its matching closer (possibly in a later sibling text) and // reconstruct the node from the content between them. Self-closing markers // (`【 type … / 】`) emit a node with no children directly, no closer search. function expandPaired(children: RichTextNode[]): RichTextNode[] { const result: RichTextNode[] = []; let remaining: RichTextNode[] = children; while (remaining.length > 0) { const head = remaining[0]!; const headValue = head.type === 'text' ? head.value : undefined; const openerMatch = headValue ? OPENER_REGEX.exec(headValue) : null; const openerTag = openerMatch?.[1]; if (!openerMatch || !openerTag || openerTag === 'end' || !headValue) { result.push(head); remaining = remaining.slice(1); continue; } const attrsRaw = openerMatch[2] ?? ''; const beforeOpener = headValue.substring(0, openerMatch.index); const afterOpener = headValue.substring(openerMatch.index + openerMatch[0].length); // Self-closing marker — `attrs` ends with whitespace + `/`. Emit the // node with empty children, skip the closer search. if (SELF_CLOSE_REGEX.test(attrsRaw)) { if (beforeOpener) result.push({ type: 'text', value: beforeOpener }); const { tagName, attrs } = normalizeLegacy(openerTag, attrsRaw.replace(SELF_CLOSE_REGEX, '')); result.push(buildNode(tagName, attrs, [])); remaining = afterOpener ? [{ type: 'text', value: afterOpener }, ...remaining.slice(1)] : remaining.slice(1); continue; } const searchSpace: RichTextNode[] = afterOpener ? [{ type: 'text', value: afterOpener }, ...remaining.slice(1)] : remaining.slice(1); const closed = findMatchingCloserInline(searchSpace, openerTag); if (!closed) { result.push(head); remaining = remaining.slice(1); continue; } if (beforeOpener) result.push({ type: 'text', value: beforeOpener }); const { tagName, attrs } = normalizeLegacy(openerTag, attrsRaw); result.push(decodeNode(buildNode(tagName, attrs, closed.before))); remaining = closed.after; } return result; } // `/\s*$` — `/` at the very end of the attrs region (with optional // trailing whitespace). String attribute values are always quoted by // `serializeValue`, so the closing `"` separates any `/` inside the // value from the trailing slash; numeric/boolean values can't end in // `/`. A `/` at the tail of `attrsRaw` is unambiguously a self-close // marker. const SELF_CLOSE_REGEX = /\/\s*$/; // For each child, look for an opener in its text descendants; if found, // search later siblings for a matching closer and reconstruct the node from // everything in between. function expandCrossSibling(children: RichTextNode[]): RichTextNode[] { const result: RichTextNode[] = []; let i = 0; while (i < children.length) { const current = children[i]; if (!current) { i++; continue; } const opener = findOpenerInBlock(current); if (!opener) { result.push(current); i++; continue; } const closer = findMatchingCloserAcrossSiblings(children, i + 1, opener.rawTag); if (!closer) { result.push(current); i++; continue; } const contentNodes: RichTextNode[] = [...opener.afterOpenerNodes]; for (let k = i + 1; k < closer.matchedAt; k++) { const mid = children[k]; if (mid) contentNodes.push(mid); } contentNodes.push(...closer.before); result.push(...opener.beforeNodes); result.push(decodeNode(buildNode(opener.tagName, opener.attrs, contentNodes))); result.push(...closer.after); i = closer.matchedAt + 1; } return result; } interface MarkerHit { nodeIndex: number; start: number; end: number; tag: string; isCloser: boolean; } function* scanMarkers(nodes: RichTextNode[]): Generator { for (const [i, node] of nodes.entries()) { if (node.type !== 'text' || !node.value) continue; for (const match of node.value.matchAll(MARKER_REGEX)) { yield { nodeIndex: i, start: match.index, end: match.index + match[0].length, tag: match[2]!, isCloser: match[1] !== undefined, }; } } } function* scanMarkersAcrossSiblings( siblings: RichTextNode[], startIdx: number, ): Generator { for (let j = startIdx; j < siblings.length; j++) { const sibling = siblings[j]; if (!sibling?.children) continue; const children = sibling.children; for (const hit of scanMarkers(children)) { yield { ...hit, siblingIndex: j, children }; } } } // Walk `hits` left-to-right for the closer that pairs with an already-opened // outer marker for `tag`. Counts nested openers (depth++) and closers // (depth--) of the same tag so that `【 foo 】 ... 【 foo 】 ... 【 end foo 】 ... 【 end foo 】` // pairs the outer opener with the second closer, not the first. Returns null // when no balanced closer exists. function findBalancedCloser(hits: Iterable, tag: string): Hit | null { let depth = 1; for (const hit of hits) { if (hit.tag !== tag) continue; depth += hit.isCloser ? -1 : 1; if (depth === 0) return hit; } return null; } function findMatchingCloserInline( children: RichTextNode[], tag: string, ): { before: RichTextNode[]; after: RichTextNode[] } | null { const hit = findBalancedCloser(scanMarkers(children), tag); return hit ? splitTextAt(children, hit) : null; } // Like findMatchingCloserInline, but walks block-level siblings starting at // `startIdx` and descends one level into each sibling's `children` to find // text nodes. Used when an outer marker's opener and closer land in different // block siblings. function findMatchingCloserAcrossSiblings( siblings: RichTextNode[], startIdx: number, tag: string, ): { matchedAt: number; before: RichTextNode[]; after: RichTextNode[] } | null { const hit = findBalancedCloser(scanMarkersAcrossSiblings(siblings, startIdx), tag); if (!hit) return null; const { before, after } = splitTextAt(hit.children, hit); // Drop a leading empty/whitespace text node left over after the closer. const first = after[0]; return { matchedAt: hit.siblingIndex, before, after: first?.type === 'text' && first.value?.trim() === '' ? after.slice(1) : after, }; } // Split `children` around the marker at `at`, which lives inside // `children[at.nodeIndex]`'s text value. function splitTextAt(children: RichTextNode[], at: MarkerHit): { before: RichTextNode[]; after: RichTextNode[] } { const child = children[at.nodeIndex]!; const value = child.type === 'text' && child.value !== undefined ? child.value : ''; const beforeText = value.substring(0, at.start); const afterText = value.substring(at.end); const before: RichTextNode[] = [...children.slice(0, at.nodeIndex)]; if (beforeText) before.push({ type: 'text', value: beforeText }); const after: RichTextNode[] = []; if (afterText) after.push({ type: 'text', value: afterText }); after.push(...children.slice(at.nodeIndex + 1)); return { before, after }; } function findOpenerInBlock(node: RichTextNode): { rawTag: string; tagName: string; attrs: Record; beforeNodes: RichTextNode[]; afterOpenerNodes: RichTextNode[]; } | null { if (!node.children) return null; for (let i = 0; i < node.children.length; i++) { const child = node.children[i]; if (!child || child.type !== 'text' || !child.value) continue; const match = OPENER_REGEX.exec(child.value); if (!match) continue; const rawTag = match[1] ?? ''; if (rawTag === 'end') continue; // If a closer already lives in this same block, expandPaired would have // resolved the pair — skip and let the next iteration look elsewhere. const closerRegex = makeCloserRegex(rawTag); const afterOpener = child.value.substring(match.index + match[0].length); if (closerRegex.test(afterOpener)) continue; if (node.children.slice(i + 1).some(s => s.type === 'text' && s.value !== undefined && closerRegex.test(s.value))) { continue; } const { tagName, attrs } = normalizeLegacy(rawTag, match[2] ?? ''); const beforeText = child.value.substring(0, match.index); const beforeNodes: RichTextNode[] = [...node.children.slice(0, i)]; if (beforeText) beforeNodes.push({ type: 'text', value: beforeText }); const afterOpenerNodes: RichTextNode[] = []; if (afterOpener.trim()) afterOpenerNodes.push({ type: 'text', value: afterOpener }); afterOpenerNodes.push(...node.children.slice(i + 1)); return { rawTag, tagName, attrs, beforeNodes, afterOpenerNodes }; } return null; } function buildNode(type: string, data: Record, children: RichTextNode[]): RichTextNode { const node: RichTextNode = { type }; if (Object.keys(data).length > 0) node.data = data; if (children.length > 0) node.children = children; return node; } // ---- Legacy alias normalization ---- // // Production content (sync-worker era, AsanaFormatter, etc.) emits a few // sentinel forms that don't match modern conventions: positional args // (`【 highlighted #ff0000 】` instead of `color="#ff0000"`) and pre-RFC tag // spellings (`header N` for `headingN`, `highlighted` for `highlight`). // Decoder accepts both shapes; encoder always emits the modern form. interface LegacyMapping { type: string; positionalArg?: string; } const LEGACY_TAG_MAP: Record = { highlight: { type: 'highlight', positionalArg: 'color' }, highlighted: { type: 'highlight', positionalArg: 'color' }, header: { type: 'heading', positionalArg: 'level' }, code: { type: 'code', positionalArg: 'lang' }, }; function normalizeLegacy(rawTag: string, rawArgs: string): { tagName: string; attrs: Record } { const mapping = LEGACY_TAG_MAP[rawTag]; const attrs: Record = {}; const trimmed = rawArgs.trim(); if (trimmed && /^\w+=/.test(trimmed)) { const tagName = mapping && mapping.positionalArg !== 'level' ? mapping.type : rawTag; // Value is a JSON literal: quoted string, true/false/null, or number; // JSON.parse recovers the typed value. A quoted-string value carrying // textual `true` / `42` decodes as the string, not the typed primitive. const attrRegex = /(\w+)=("[^"]*"|true|false|null|-?\d+(?:\.\d+)?)/g; let match: RegExpExecArray | null; while ((match = attrRegex.exec(rawArgs)) !== null) { const key = match[1]; const raw = match[2]; if (key !== undefined && raw !== undefined) { try { attrs[key] = JSON.parse(raw) as unknown; } catch { attrs[key] = raw; } } } return { tagName, attrs }; } if (!mapping) return { tagName: rawTag, attrs }; const positionalArg = mapping.positionalArg; const firstPart = trimmed ? trimmed.split(/\s+/)[0] : undefined; if (positionalArg && firstPart) { if (positionalArg === 'level') { return { tagName: `heading${firstPart}`, attrs }; } attrs[positionalArg] = firstPart; } return { tagName: mapping.type, attrs }; }