/**
* External dependencies
*/
import { describe, expect, it } from '@jest/globals';
/**
* Internal dependencies
*/
import {
htmlIndexToRichTextOffset,
richTextOffsetToHtmlIndex,
} from '../crdt-utils';
describe( 'htmlIndexToRichTextOffset', () => {
it( 'returns the index unchanged when there are no tags', () => {
expect( htmlIndexToRichTextOffset( 'hello world', 5 ) ).toBe( 5 );
} );
it( 'returns 0 for index 0', () => {
expect(
htmlIndexToRichTextOffset( 'bold text', 0 )
).toBe( 0 );
} );
it( 'skips a simple opening tag', () => {
// "bold text"
// HTML index 8 = first char of "bold" (right after )
expect(
htmlIndexToRichTextOffset( 'bold text', 8 )
).toBe( 0 );
} );
it( 'counts text characters inside a tag', () => {
// "bold text"
// HTML index 10 = 'l' in "bold" (after "bo")
// Text: "bold text", offset should be 2
expect(
htmlIndexToRichTextOffset( 'bold text', 10 )
).toBe( 2 );
} );
it( 'skips closing tags', () => {
// "bold text"
// HTML index 21 = ' ' (the space after )
// Text: "bold text", offset should be 4
expect(
htmlIndexToRichTextOffset( 'bold text', 21 )
).toBe( 4 );
} );
it( 'handles text before a tag', () => {
// "some words test"
// HTML index 3 = 'e' in "some"
expect(
htmlIndexToRichTextOffset( 'some words test', 3 )
).toBe( 3 );
} );
it( 'handles an index right before a tag', () => {
// "some words test"
// HTML index 5 = '<' of
expect(
htmlIndexToRichTextOffset( 'some words test', 5 )
).toBe( 5 );
} );
it( 'handles an index right after an opening tag', () => {
// "some words test"
// HTML index 13 = 'w' of "words" (right after )
expect(
htmlIndexToRichTextOffset( 'some words test', 13 )
).toBe( 5 );
} );
it( 'handles an index at the end of formatted text', () => {
// "some words test"
// HTML index 18 = '<' of
// Text offset should be 10 ("some words")
expect(
htmlIndexToRichTextOffset( 'some words test', 18 )
).toBe( 10 );
} );
it( 'handles an index after the closing tag', () => {
// "some words test"
// HTML index 27 = ' ' after
expect(
htmlIndexToRichTextOffset( 'some words test', 27 )
).toBe( 10 );
} );
it( 'handles the end of the string', () => {
const html = 'some words test';
expect( htmlIndexToRichTextOffset( html, html.length ) ).toBe( 15 );
} );
it( 'handles nested tags', () => {
// "abc"
// Text: "abc"
// HTML index 13 = 'b' (after )
expect(
htmlIndexToRichTextOffset( 'abc', 13 )
).toBe( 1 );
// HTML index 28 = 'c' (after )
expect(
htmlIndexToRichTextOffset( 'abc', 28 )
).toBe( 2 );
} );
it( 'handles tags with attributes', () => {
// 'link text'
// HTML index 30 = 'l' in "link"
expect(
htmlIndexToRichTextOffset(
'link text',
30
)
).toBe( 0 );
} );
it( 'handles HTML entity &', () => {
// "Tom & Jerry"
// Text: "Tom & Jerry" (11 chars)
// HTML index 4 = '&' start of &
expect( htmlIndexToRichTextOffset( 'Tom & Jerry', 4 ) ).toBe( 4 );
// HTML index 9 = ' ' after &
expect( htmlIndexToRichTextOffset( 'Tom & Jerry', 9 ) ).toBe( 5 );
} );
it( 'handles HTML entity <', () => {
// "a < b"
// Text: "a < b" (5 chars)
// HTML index 2 = '&' start of <
expect( htmlIndexToRichTextOffset( 'a < b', 2 ) ).toBe( 2 );
// HTML index 6 = ' ' after <
expect( htmlIndexToRichTextOffset( 'a < b', 6 ) ).toBe( 3 );
} );
it( 'handles numeric character references', () => {
// "a&b" (& = &)
// Text: "a&b" (3 chars)
// HTML index 1 = '&' start of &
expect( htmlIndexToRichTextOffset( 'a&b', 1 ) ).toBe( 1 );
// HTML index 6 = 'b'
expect( htmlIndexToRichTextOffset( 'a&b', 6 ) ).toBe( 2 );
} );
// These tests document the behavior when htmlIndex lands inside an
// HTML tag or entity, possible from non-synced peers. The marker is
// inserted at the raw index, which may break the HTML, but create()
// produces a best-effort parse. Pinning the current behavior here so
// any future changes are intentional.
it( 'handles an htmlIndex pointing inside an opening tag', () => {
// "some words test"
// HTML index 7 = 'n' inside
// The marker breaks the tag, so create() treats the broken
// fragments as text. The marker position in the resulting
// (corrupted) text happens to equal the raw htmlIndex.
const result = htmlIndexToRichTextOffset(
'some words test',
7
);
expect( typeof result ).toBe( 'number' );
expect( result ).toBe( 7 );
} );
it( 'handles an htmlIndex pointing inside a closing tag', () => {
// "some words test"
// HTML index 20 = 't' inside
// Same as above, the broken closing tag becomes text.
const result = htmlIndexToRichTextOffset(
'some words test',
20
);
expect( typeof result ).toBe( 'number' );
expect( result ).toBe( 20 );
} );
it( 'handles an htmlIndex pointing inside an entity', () => {
// "Tom & Jerry"
// HTML index 6 = 'p' inside &
// The broken entity is not parsed, so the raw text including
// the marker is preserved and the position equals htmlIndex.
const result = htmlIndexToRichTextOffset( 'Tom & Jerry', 6 );
expect( typeof result ).toBe( 'number' );
expect( result ).toBe( 6 );
} );
it( 'handles self-closing tags like
', () => {
// "line1
line2"
// Gutenberg's rich-text treats
as a line separator character,
// so text = "line1\u2028line2" (11 chars). HTML index 11 = 'l' of
// "line2" → rich-text offset 6 (after "line1" + line separator).
expect( htmlIndexToRichTextOffset( 'line1
line2', 11 ) ).toBe( 6 );
} );
it( 'handles multiple adjacent tags', () => {
// "text"
// HTML index 12 = 't' (after )
expect(
htmlIndexToRichTextOffset( 'text', 12 )
).toBe( 0 );
} );
it( 'handles empty content', () => {
expect( htmlIndexToRichTextOffset( '', 0 ) ).toBe( 0 );
} );
it( 'handles tag attribute containing ">" inside quotes', () => {
// 'link'
// The DOM parser correctly handles > inside quoted attributes.
// HTML index 15 = 'l' in "link", rich-text offset = 0.
const html = 'link';
const result = htmlIndexToRichTextOffset( html, 15 );
expect( result ).toBe( 0 );
} );
} );
describe( 'richTextOffsetToHtmlIndex', () => {
it( 'returns the offset unchanged when there are no tags', () => {
expect( richTextOffsetToHtmlIndex( 'hello world', 5 ) ).toBe( 5 );
} );
it( 'returns position after the opening tag for offset 0 with tags', () => {
// Rich-text offset 0 = 'b' → HTML index 8 (after )
expect(
richTextOffsetToHtmlIndex( 'bold text', 0 )
).toBe( 8 );
} );
it( 'maps offset inside a formatted word', () => {
// "some words test"
// Rich-text offset 5 = 'w' → HTML index 13 (after )
expect(
richTextOffsetToHtmlIndex( 'some words test', 5 )
).toBe( 13 );
} );
it( 'maps offset at the middle of a formatted word', () => {
// Rich-text offset 7 = 'r' in "words" → HTML index 15
expect(
richTextOffsetToHtmlIndex( 'some words test', 7 )
).toBe( 15 );
} );
it( 'maps offset right after a formatted word', () => {
// Rich-text offset 10 = ' ' after "words" → HTML index 27 (after )
expect(
richTextOffsetToHtmlIndex( 'some words test', 10 )
).toBe( 27 );
} );
it( 'maps offset before any tags', () => {
// Rich-text offset 3 = 'e' in "some"
expect(
richTextOffsetToHtmlIndex( 'some words test', 3 )
).toBe( 3 );
} );
it( 'maps offset at end of string', () => {
const html = 'some words test';
// Rich-text offset 15 = end of "some words test"
expect( richTextOffsetToHtmlIndex( html, 15 ) ).toBe( html.length );
} );
it( 'handles nested formatting', () => {
// "abc"
// Rich-text offset 1 = 'b' → HTML index 13
expect(
richTextOffsetToHtmlIndex( 'abc', 1 )
).toBe( 13 );
} );
it( 'handles tags with attributes', () => {
// 'link text'
// Rich-text offset 0 = 'l' → HTML index 30
expect(
richTextOffsetToHtmlIndex(
'link text',
0
)
).toBe( 30 );
} );
it( 'is the inverse of htmlIndexToRichTextOffset for text positions', () => {
const html = 'some words test';
const textPositions = [ 0, 3, 5, 7, 10, 15 ];
for ( const textOffset of textPositions ) {
const htmlIndex = richTextOffsetToHtmlIndex( html, textOffset );
const roundTripped = htmlIndexToRichTextOffset( html, htmlIndex );
expect( roundTripped ).toBe( textOffset );
}
} );
it( 'handles empty string', () => {
expect( richTextOffsetToHtmlIndex( '', 0 ) ).toBe( 0 );
} );
it( 'handles HTML entity &', () => {
// "Tom & Jerry"
// Text: "Tom & Jerry" (11 chars)
// Rich-text offset 4 = '&' → HTML index 4 (start of &)
expect( richTextOffsetToHtmlIndex( 'Tom & Jerry', 4 ) ).toBe( 4 );
// Rich-text offset 5 = ' ' after '&' → HTML index 9 (after &)
expect( richTextOffsetToHtmlIndex( 'Tom & Jerry', 5 ) ).toBe( 9 );
} );
it( 'handles HTML entity <', () => {
// "a < b"
// Text: "a < b" (5 chars)
// Rich-text offset 2 = '<' → HTML index 2 (start of <)
expect( richTextOffsetToHtmlIndex( 'a < b', 2 ) ).toBe( 2 );
// Rich-text offset 3 = ' ' after '<' → HTML index 6 (after <)
expect( richTextOffsetToHtmlIndex( 'a < b', 3 ) ).toBe( 6 );
} );
it( 'handles numeric character references', () => {
// "a&b" (& = &)
// Text: "a&b" (3 chars)
// Rich-text offset 1 = '&' → HTML index 1 (start of &)
expect( richTextOffsetToHtmlIndex( 'a&b', 1 ) ).toBe( 1 );
// Rich-text offset 2 = 'b' → HTML index 6 (after &)
expect( richTextOffsetToHtmlIndex( 'a&b', 2 ) ).toBe( 6 );
} );
it( 'handles multiple formatted ranges', () => {
// "abcde"
// Text: "abcde"
// Offset 1 = 'b' → HTML index 9 (after )
expect(
richTextOffsetToHtmlIndex( 'abcde', 1 )
).toBe( 9 );
// Offset 3 = 'd' → HTML index 24 (after )
expect(
richTextOffsetToHtmlIndex( 'abcde', 3 )
).toBe( 24 );
} );
} );
describe( 'round-trip consistency', () => {
const testCases: [ string, string ][] = [
[ 'plain text', 'hello world' ],
[ 'single bold', 'some words test' ],
[ 'nested formatting', 'abcde' ],
[
'link with attributes',
'link text',
],
[ 'multiple ranges', 'abcde' ],
[ 'adjacent tags', 'ab' ],
[ 'entity &', 'Tom & Jerry' ],
[ 'entity <', 'a < b' ],
[ 'numeric entity &', 'a&b' ],
];
for ( const [ label, html ] of testCases ) {
it( `round-trips all text positions for: ${ label }`, () => {
// Determine total text length by finding max valid offset.
// Walk the HTML and count text chars for total length.
const totalTextLen = htmlIndexToRichTextOffset( html, html.length );
for (
let textOffset = 0;
textOffset <= totalTextLen;
textOffset++
) {
const htmlIndex = richTextOffsetToHtmlIndex( html, textOffset );
const roundTripped = htmlIndexToRichTextOffset(
html,
htmlIndex
);
expect( {
label,
textOffset,
htmlIndex,
roundTripped,
} ).toMatchObject( {
label,
textOffset,
roundTripped: textOffset,
} );
}
} );
}
} );