/** * External dependencies */ import { describe, expect, it } from '@jest/globals'; /** * Internal dependencies */ import { htmlIndexToRichTextOffset, richTextOffsetToHtmlIndex, } from '../crdt-utils'; describe( 'htmlIndexToRichTextOffset', () => { it( 'returns the index unchanged when there are no tags', () => { expect( htmlIndexToRichTextOffset( 'hello world', 5 ) ).toBe( 5 ); } ); it( 'returns 0 for index 0', () => { expect( htmlIndexToRichTextOffset( 'bold text', 0 ) ).toBe( 0 ); } ); it( 'skips a simple opening tag', () => { // "bold text" // HTML index 8 = first char of "bold" (right after ) expect( htmlIndexToRichTextOffset( 'bold text', 8 ) ).toBe( 0 ); } ); it( 'counts text characters inside a tag', () => { // "bold text" // HTML index 10 = 'l' in "bold" (after "bo") // Text: "bold text", offset should be 2 expect( htmlIndexToRichTextOffset( 'bold text', 10 ) ).toBe( 2 ); } ); it( 'skips closing tags', () => { // "bold text" // HTML index 21 = ' ' (the space after ) // Text: "bold text", offset should be 4 expect( htmlIndexToRichTextOffset( 'bold text', 21 ) ).toBe( 4 ); } ); it( 'handles text before a tag', () => { // "some words test" // HTML index 3 = 'e' in "some" expect( htmlIndexToRichTextOffset( 'some words test', 3 ) ).toBe( 3 ); } ); it( 'handles an index right before a tag', () => { // "some words test" // HTML index 5 = '<' of expect( htmlIndexToRichTextOffset( 'some words test', 5 ) ).toBe( 5 ); } ); it( 'handles an index right after an opening tag', () => { // "some words test" // HTML index 13 = 'w' of "words" (right after ) expect( htmlIndexToRichTextOffset( 'some words test', 13 ) ).toBe( 5 ); } ); it( 'handles an index at the end of formatted text', () => { // "some words test" // HTML index 18 = '<' of // Text offset should be 10 ("some words") expect( htmlIndexToRichTextOffset( 'some words test', 18 ) ).toBe( 10 ); } ); it( 'handles an index after the closing tag', () => { // "some words test" // HTML index 27 = ' ' after expect( htmlIndexToRichTextOffset( 'some words test', 27 ) ).toBe( 10 ); } ); it( 'handles the end of the string', () => { const html = 'some words test'; expect( htmlIndexToRichTextOffset( html, html.length ) ).toBe( 15 ); } ); it( 'handles nested tags', () => { // "abc" // Text: "abc" // HTML index 13 = 'b' (after ) expect( htmlIndexToRichTextOffset( 'abc', 13 ) ).toBe( 1 ); // HTML index 28 = 'c' (after ) expect( htmlIndexToRichTextOffset( 'abc', 28 ) ).toBe( 2 ); } ); it( 'handles tags with attributes', () => { // 'link text' // HTML index 30 = 'l' in "link" expect( htmlIndexToRichTextOffset( 'link text', 30 ) ).toBe( 0 ); } ); it( 'handles HTML entity &', () => { // "Tom & Jerry" // Text: "Tom & Jerry" (11 chars) // HTML index 4 = '&' start of & expect( htmlIndexToRichTextOffset( 'Tom & Jerry', 4 ) ).toBe( 4 ); // HTML index 9 = ' ' after & expect( htmlIndexToRichTextOffset( 'Tom & Jerry', 9 ) ).toBe( 5 ); } ); it( 'handles HTML entity <', () => { // "a < b" // Text: "a < b" (5 chars) // HTML index 2 = '&' start of < expect( htmlIndexToRichTextOffset( 'a < b', 2 ) ).toBe( 2 ); // HTML index 6 = ' ' after < expect( htmlIndexToRichTextOffset( 'a < b', 6 ) ).toBe( 3 ); } ); it( 'handles numeric character references', () => { // "a&b" (& = &) // Text: "a&b" (3 chars) // HTML index 1 = '&' start of & expect( htmlIndexToRichTextOffset( 'a&b', 1 ) ).toBe( 1 ); // HTML index 6 = 'b' expect( htmlIndexToRichTextOffset( 'a&b', 6 ) ).toBe( 2 ); } ); // These tests document the behavior when htmlIndex lands inside an // HTML tag or entity, possible from non-synced peers. The marker is // inserted at the raw index, which may break the HTML, but create() // produces a best-effort parse. Pinning the current behavior here so // any future changes are intentional. it( 'handles an htmlIndex pointing inside an opening tag', () => { // "some words test" // HTML index 7 = 'n' inside // The marker breaks the tag, so create() treats the broken // fragments as text. The marker position in the resulting // (corrupted) text happens to equal the raw htmlIndex. const result = htmlIndexToRichTextOffset( 'some words test', 7 ); expect( typeof result ).toBe( 'number' ); expect( result ).toBe( 7 ); } ); it( 'handles an htmlIndex pointing inside a closing tag', () => { // "some words test" // HTML index 20 = 't' inside // Same as above, the broken closing tag becomes text. const result = htmlIndexToRichTextOffset( 'some words test', 20 ); expect( typeof result ).toBe( 'number' ); expect( result ).toBe( 20 ); } ); it( 'handles an htmlIndex pointing inside an entity', () => { // "Tom & Jerry" // HTML index 6 = 'p' inside & // The broken entity is not parsed, so the raw text including // the marker is preserved and the position equals htmlIndex. const result = htmlIndexToRichTextOffset( 'Tom & Jerry', 6 ); expect( typeof result ).toBe( 'number' ); expect( result ).toBe( 6 ); } ); it( 'handles self-closing tags like
', () => { // "line1
line2" // Gutenberg's rich-text treats
as a line separator character, // so text = "line1\u2028line2" (11 chars). HTML index 11 = 'l' of // "line2" → rich-text offset 6 (after "line1" + line separator). expect( htmlIndexToRichTextOffset( 'line1
line2', 11 ) ).toBe( 6 ); } ); it( 'handles multiple adjacent tags', () => { // "text" // HTML index 12 = 't' (after ) expect( htmlIndexToRichTextOffset( 'text', 12 ) ).toBe( 0 ); } ); it( 'handles empty content', () => { expect( htmlIndexToRichTextOffset( '', 0 ) ).toBe( 0 ); } ); it( 'handles tag attribute containing ">" inside quotes', () => { // 'link' // The DOM parser correctly handles > inside quoted attributes. // HTML index 15 = 'l' in "link", rich-text offset = 0. const html = 'link'; const result = htmlIndexToRichTextOffset( html, 15 ); expect( result ).toBe( 0 ); } ); } ); describe( 'richTextOffsetToHtmlIndex', () => { it( 'returns the offset unchanged when there are no tags', () => { expect( richTextOffsetToHtmlIndex( 'hello world', 5 ) ).toBe( 5 ); } ); it( 'returns position after the opening tag for offset 0 with tags', () => { // Rich-text offset 0 = 'b' → HTML index 8 (after ) expect( richTextOffsetToHtmlIndex( 'bold text', 0 ) ).toBe( 8 ); } ); it( 'maps offset inside a formatted word', () => { // "some words test" // Rich-text offset 5 = 'w' → HTML index 13 (after ) expect( richTextOffsetToHtmlIndex( 'some words test', 5 ) ).toBe( 13 ); } ); it( 'maps offset at the middle of a formatted word', () => { // Rich-text offset 7 = 'r' in "words" → HTML index 15 expect( richTextOffsetToHtmlIndex( 'some words test', 7 ) ).toBe( 15 ); } ); it( 'maps offset right after a formatted word', () => { // Rich-text offset 10 = ' ' after "words" → HTML index 27 (after ) expect( richTextOffsetToHtmlIndex( 'some words test', 10 ) ).toBe( 27 ); } ); it( 'maps offset before any tags', () => { // Rich-text offset 3 = 'e' in "some" expect( richTextOffsetToHtmlIndex( 'some words test', 3 ) ).toBe( 3 ); } ); it( 'maps offset at end of string', () => { const html = 'some words test'; // Rich-text offset 15 = end of "some words test" expect( richTextOffsetToHtmlIndex( html, 15 ) ).toBe( html.length ); } ); it( 'handles nested formatting', () => { // "abc" // Rich-text offset 1 = 'b' → HTML index 13 expect( richTextOffsetToHtmlIndex( 'abc', 1 ) ).toBe( 13 ); } ); it( 'handles tags with attributes', () => { // 'link text' // Rich-text offset 0 = 'l' → HTML index 30 expect( richTextOffsetToHtmlIndex( 'link text', 0 ) ).toBe( 30 ); } ); it( 'is the inverse of htmlIndexToRichTextOffset for text positions', () => { const html = 'some words test'; const textPositions = [ 0, 3, 5, 7, 10, 15 ]; for ( const textOffset of textPositions ) { const htmlIndex = richTextOffsetToHtmlIndex( html, textOffset ); const roundTripped = htmlIndexToRichTextOffset( html, htmlIndex ); expect( roundTripped ).toBe( textOffset ); } } ); it( 'handles empty string', () => { expect( richTextOffsetToHtmlIndex( '', 0 ) ).toBe( 0 ); } ); it( 'handles HTML entity &', () => { // "Tom & Jerry" // Text: "Tom & Jerry" (11 chars) // Rich-text offset 4 = '&' → HTML index 4 (start of &) expect( richTextOffsetToHtmlIndex( 'Tom & Jerry', 4 ) ).toBe( 4 ); // Rich-text offset 5 = ' ' after '&' → HTML index 9 (after &) expect( richTextOffsetToHtmlIndex( 'Tom & Jerry', 5 ) ).toBe( 9 ); } ); it( 'handles HTML entity <', () => { // "a < b" // Text: "a < b" (5 chars) // Rich-text offset 2 = '<' → HTML index 2 (start of <) expect( richTextOffsetToHtmlIndex( 'a < b', 2 ) ).toBe( 2 ); // Rich-text offset 3 = ' ' after '<' → HTML index 6 (after <) expect( richTextOffsetToHtmlIndex( 'a < b', 3 ) ).toBe( 6 ); } ); it( 'handles numeric character references', () => { // "a&b" (& = &) // Text: "a&b" (3 chars) // Rich-text offset 1 = '&' → HTML index 1 (start of &) expect( richTextOffsetToHtmlIndex( 'a&b', 1 ) ).toBe( 1 ); // Rich-text offset 2 = 'b' → HTML index 6 (after &) expect( richTextOffsetToHtmlIndex( 'a&b', 2 ) ).toBe( 6 ); } ); it( 'handles multiple formatted ranges', () => { // "abcde" // Text: "abcde" // Offset 1 = 'b' → HTML index 9 (after ) expect( richTextOffsetToHtmlIndex( 'abcde', 1 ) ).toBe( 9 ); // Offset 3 = 'd' → HTML index 24 (after ) expect( richTextOffsetToHtmlIndex( 'abcde', 3 ) ).toBe( 24 ); } ); } ); describe( 'round-trip consistency', () => { const testCases: [ string, string ][] = [ [ 'plain text', 'hello world' ], [ 'single bold', 'some words test' ], [ 'nested formatting', 'abcde' ], [ 'link with attributes', 'link text', ], [ 'multiple ranges', 'abcde' ], [ 'adjacent tags', 'ab' ], [ 'entity &', 'Tom & Jerry' ], [ 'entity <', 'a < b' ], [ 'numeric entity &', 'a&b' ], ]; for ( const [ label, html ] of testCases ) { it( `round-trips all text positions for: ${ label }`, () => { // Determine total text length by finding max valid offset. // Walk the HTML and count text chars for total length. const totalTextLen = htmlIndexToRichTextOffset( html, html.length ); for ( let textOffset = 0; textOffset <= totalTextLen; textOffset++ ) { const htmlIndex = richTextOffsetToHtmlIndex( html, textOffset ); const roundTripped = htmlIndexToRichTextOffset( html, htmlIndex ); expect( { label, textOffset, htmlIndex, roundTripped, } ).toMatchObject( { label, textOffset, roundTripped: textOffset, } ); } } ); } } );