import { test, expect, describe } from 'vitest' import { unified, Plugin } from 'unified' import remarkMdx from 'remark-mdx' import remarkStringify from 'remark-stringify' import remarkParse from 'remark-parse' import { parseHtmlToMdxAst } from 'safe-mdx/parse' import { Root, RootContent } from 'mdast' import { remark } from 'remark' import { visit } from 'unist-util-visit' /** Template literal for auto formatting with dedent */ function html( strings: TemplateStringsArray, ...expressions: unknown[] ): string { // Join all string parts let raw = strings[0] ?? '' for (let i = 1, l = strings.length; i < l; i++) { raw += expressions[i - 1] raw += strings[i] } // dedent: remove common leading whitespace from all non-empty lines const lines = raw.split('\n') // Ignore empty lines and lines with only whitespace const nonEmptyLines = lines.filter((line) => line.trim().length > 0) const indentLengths = nonEmptyLines.map( (line) => line.match(/^(\s*)/)![0].length, ) const minIndent = indentLengths.length > 0 ? Math.min(...indentLengths) : 0 // Remove the common indent from all lines const dedented = lines.map((line) => line.slice(minIndent)).join('\n') // Trim leading/trailing newlines return dedented.trim() } // Helper to convert HTML to MDX string async function htmlToMdxString({ markdown, onError, }: { markdown: string onError?: (error: unknown, text: string) => void }): Promise { const remarkHtmlBlocks: Plugin<[], Root> = function () { return (tree: Root) => { visit(tree, (node, index, parent) => { if ( node.type === 'html' && parent && typeof index === 'number' ) { const htmlValue = node.value as string // Parse HTML to MDX AST with processor for markdown parsing const mdxNodes = parseHtmlToMdxAst({ html: htmlValue, onError, textToMdast: ({ text: x }) => { const processor = remark().use(() => { return (tree, file) => { file.data.ast = tree } }) const mdast = processor.parse(x) as any processor.runSync(mdast) return mdast }, parentType: parent.type, }) // Replace the HTML node with the MDX nodes if (mdxNodes.length === 1) { parent.children[index] = mdxNodes[0] } else if (mdxNodes.length > 1) { parent.children.splice(index, 1, ...mdxNodes) } else { // Remove the node if no content parent.children.splice(index, 1) } } }) } } const processor = remark().use(remarkHtmlBlocks).use(remarkStringify, {}) const mdast = processor.parse(markdown) processor.runSync(mdast) return remark().use(remarkMdx).use(remarkStringify, {}).stringify(mdast) } describe('Notion-specific HTML to MDX', () => { test('converts page element to MDX with surrounding markdown', async () => { const htmlContent = html` Test Page ` const markdown = ` # My Document ${htmlContent} Some text after the page element. ` const mdxString = await htmlToMdxString({ markdown, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "# My Document Test Page Some text after the page element. " `) }) test('converts callout element to MDX with surrounding content', async () => { const htmlContent = html` Important note ` const markdown = ` Here's an important message: ${htmlContent} **Bold text** after the callout. ` const mdxString = await htmlToMdxString({ markdown, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "Here's an important message: Important note **Bold text** after the callout. " `) }) test('converts mention-page element to MDX with mixed content', async () => { const htmlContent = html` ` const markdown = `Check out this page: ${htmlContent} for more information. - First item - Second item` const mdxString = await htmlToMdxString({ markdown, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "Check out this page: for more information. * First item * Second item " `) }) test('converts nested Notion elements to MDX', async () => { const htmlContent = html` Page 1 Some text Important callout ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " Page 1 Some text Important callout " `) }) test('handles mixed HTML and Notion elements with surrounding markdown', async () => { const htmlContent = html`

Title

Test Page

Regular paragraph

` const markdown = `## Section Header ${htmlContent} And here's a [link](https://example.com) after the HTML block.` const mdxString = await htmlToMdxString({ markdown, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "## Section Header

Title

Test Page

Regular paragraph

And here's a [link](https://example.com) after the HTML block. " `) }) test('converts span with color attribute', async () => { const htmlContent = html` Blue text ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " Blue text " `) }) test('handles table element conversion', async () => { const htmlContent = html`
Cell 1 Cell 2
` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "
Cell 1 Cell 2
" `) }) test('handles image element conversion', async () => { const htmlContent = html` Test image ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "Test image " `) }) test('handles unknown element conversion', async () => { const htmlContent = html` ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " " `) }) test('handles columns with content and surrounding markdown', async () => { const htmlContent = html`

Section 1

Page Link
Warning: Important information
` const markdown = `# Main Title Here's some introductory text before the columns. ${htmlContent} --- Footer text with **bold** and *italic*.` const mdxString = await htmlToMdxString({ markdown, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "# Main Title Here's some introductory text before the columns.

Section 1

Page Link
Warning: Important information
*** Footer text with **bold** and *italic*. " `) }) test('handles HTML wrappers around markdown content', async () => { // TODO if ypu do not add a new line after it gets all parsed as html! const markdown = html` ## GitHub/GitLab: Update issues with pull request actions The GitHub and GitLab integrations move issues from *In Progress* to *Done* automatically so you never have to update issues manually. It takes less than a minute to connect GitHub to the workspace and then go to team settings to configure the automatic updates. Read more in the detailed [documentation]({{/60b0cf80dbe0420faa1264a58da48bd2}}). ### ✨ProTip: Set personal GitHub preferences Configure these settings in Preferences under Account Settings. ` const mdxString = await htmlToMdxString({ markdown, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " ## GitHub/GitLab: Update issues with pull request actions The GitHub and GitLab integrations move issues from *In Progress* to *Done* automatically so you never have to update issues manually. It takes less than a minute to connect GitHub to the workspace and then go to team settings to configure the automatic updates. Read more in the detailed [documentation](\\{\\{/60b0cf80dbe0420faa1264a58da48bd2}}). ### ✨ProTip: Set personal GitHub preferences Configure these settings in Preferences under Account Settings. " `) }) }) describe('parseHtmlToMdxAst', () => { test('parses simple HTML element', () => { const result = parseHtmlToMdxAst({ html: '
Hello
' }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [], "children": [ { "type": "text", "value": "Hello", }, ], "name": "div", "type": "mdxJsxTextElement", }, ] `) }) test('parses element without transforms (generic)', () => { const result = parseHtmlToMdxAst({ html: 'Test Page', }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [ { "name": "url", "type": "mdxJsxAttribute", "value": "{{https://notion.so/test}}", }, ], "children": [ { "type": "text", "value": "Test Page", }, ], "name": "page", "type": "mdxJsxTextElement", }, ] `) }) test('parses Notion page element', () => { const result = parseHtmlToMdxAst({ html: 'Test Page', }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [ { "name": "url", "type": "mdxJsxAttribute", "value": "{{https://www.notion.so/test}}", }, ], "children": [ { "type": "text", "value": "Test Page", }, ], "name": "page", "type": "mdxJsxTextElement", }, ] `) }) test('handles partial HTML - opening tag only', () => { const result = parseHtmlToMdxAst({ html: '
' }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [], "children": [], "name": "div", "type": "mdxJsxTextElement", }, ] `) }) test('handles partial HTML - closing tag only', () => { const result = parseHtmlToMdxAst({ html: '
' }) expect(result).toMatchInlineSnapshot(` [] `) }) test('handles self-closing tags', () => { const result = parseHtmlToMdxAst({ html: '', }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [ { "name": "source", "type": "mdxJsxAttribute", "value": "{{https://example.com/img.jpg}}", }, ], "children": [], "name": "img", "type": "mdxJsxTextElement", }, ] `) }) test('handles mention-page element', () => { const result = parseHtmlToMdxAst({ html: '', }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [ { "name": "url", "type": "mdxJsxAttribute", "value": "{{https://www.notion.so/test}}", }, ], "children": [], "name": "mention-page", "type": "mdxJsxTextElement", }, ] `) }) test('handles callout with attributes', () => { const result = parseHtmlToMdxAst({ html: 'Some text', }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [ { "name": "icon", "type": "mdxJsxAttribute", "value": "📎", }, { "name": "color", "type": "mdxJsxAttribute", "value": "pink_bg", }, ], "children": [ { "type": "text", "value": "Some text", }, ], "name": "callout", "type": "mdxJsxTextElement", }, ] `) }) test('handles span with color', () => { const result = parseHtmlToMdxAst({ html: 'colored text', }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [ { "name": "color", "type": "mdxJsxAttribute", "value": "blue", }, ], "children": [ { "type": "text", "value": "colored text", }, ], "name": "span", "type": "mdxJsxTextElement", }, ] `) }) test('handles mixed content', () => { const result = parseHtmlToMdxAst({ html: 'Some text Page more text', }) expect(result).toMatchInlineSnapshot(` [ { "type": "text", "value": "Some text", }, { "attributes": [ { "name": "url", "type": "mdxJsxAttribute", "value": "{{https://notion.so/test}}", }, ], "children": [ { "type": "text", "value": "Page", }, ], "name": "page", "type": "mdxJsxTextElement", }, { "type": "text", "value": "more text", }, ] `) }) test('handles comments', () => { const result = parseHtmlToMdxAst({ html: '' }) expect(result).toMatchInlineSnapshot(`[]`) }) test('handles table with attributes', () => { const result = parseHtmlToMdxAst({ html: '
Cell
', }) expect(result).toMatchInlineSnapshot(` [ { "attributes": [ { "name": "header-row", "type": "mdxJsxAttribute", "value": "true", }, ], "children": [ { "attributes": [], "children": [ { "attributes": [], "children": [ { "type": "text", "value": "Cell", }, ], "name": "td", "type": "mdxJsxTextElement", }, ], "name": "tr", "type": "mdxJsxTextElement", }, ], "name": "table", "type": "mdxJsxTextElement", }, ] `) }) }) describe('parseHtmlToMdxAst without transforms (generic)', () => { test('preserves tag names without transform', () => { const result = parseHtmlToMdxAst({ html: 'Content' }) expect(result[0]).toHaveProperty('name', 'page') }) test('preserves curly brace URLs without transform', () => { const result = parseHtmlToMdxAst({ html: 'Link', }) expect(result[0]).toHaveProperty('attributes') const attrs = (result[0] as any).attributes expect(attrs[0]).toHaveProperty('value', '{{https://example.com}}') }) }) describe('parseHtmlToMdxAst with markdown processor', () => { test('parses markdown inside HTML tags', async () => { const htmlContent = html` This is **bold** text ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " This is **bold** text " `) }) test('parses markdown links inside HTML', async () => { const htmlContent = html` [link](http://google.com) ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " [link](http://google.com) " `) }) test('parses mixed markdown and HTML inside tags', async () => { const htmlContent = html` **Read next:** ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " **Read next:** " `) }) test('handles bold inside span with underline', async () => { const htmlContent = html` **sdf dsf** ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " **sdf dsf** " `) }) test('converts markdown inside callout to MDX string', async () => { const htmlContent = html` **Read next:** Some page ` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` " **Read next:** Some page " `) }) test('handles markdown inside table cells', async () => { const htmlContent = html`
**Bold** text and [link](http://example.com)
` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "
**Bold** text and [link](http://example.com)
" `) }) test('preserves plain text when no markdown', async () => { const htmlContent = html`
Plain text without markdown
` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "
Plain text without markdown
" `) }) test('handles nested HTML tags with markdown', async () => { const htmlContent = html`
**Bold** and link
` const mdxString = await htmlToMdxString({ markdown: htmlContent, onError: (e) => { throw e }, }) expect(mdxString).toMatchInlineSnapshot(` "
**Bold** and link
" `) }) })