/** * Gutenberg to Portable Text Converter * * Converts WordPress Gutenberg block content to Portable Text format. * Uses @wordpress/block-serialization-default-parser to parse the hybrid * HTML+JSON format that WordPress uses. */ import { parse } from "@wordpress/block-serialization-default-parser"; import { parseInlineContent } from "./inline.js"; import { getTransformer } from "./transformers/index.js"; import type { GutenbergBlock, PortableTextBlock, ConvertOptions, TransformContext, } from "./types.js"; // Regex patterns for HTML parsing and conversion const BLOCK_ELEMENT_PATTERN = /<(p|h[1-6]|blockquote|pre|ul|ol|figure|div|hr)[^>]*>([\s\S]*?)<\/\1>|<(hr|br)\s*\/?>|]+\/?>/gu; const LINKED_IMAGE_PATTERN = /]*href=["']([^"']+)["'][^>]*>\s*]+)\/?>\s*<\/a>/gu; const STANDALONE_IMAGE_PATTERN = /]+\/?>/gu; const IMG_TAG_PATTERN = /]+>/i; const SRC_ATTR_PATTERN = /src=["']([^"']+)["']/i; const ALT_ATTR_PATTERN = /alt=["']([^"']*)["']/i; const LIST_ITEM_PATTERN = /]*>([\s\S]*?)<\/li>/gu; const CODE_TAG_PATTERN = /]*>([\s\S]*?)<\/code>/i; const HTML_TAG_PATTERN = /<[^>]+>/g; const FIGCAPTION_TAG_PATTERN = /]*>([\s\S]*?)<\/figcaption>/i; const AMP_ENTITY_PATTERN = /&/g; const LESS_THAN_ENTITY_PATTERN = /</g; const GREATER_THAN_ENTITY_PATTERN = />/g; const QUOTE_ENTITY_PATTERN = /"/g; const APOS_ENTITY_PATTERN = /'/g; const NUMERIC_AMP_ENTITY_PATTERN = /�?38;/g; const HEX_AMP_ENTITY_PATTERN = /&/gi; const NBSP_ENTITY_PATTERN = / /g; // Re-export types export type { GutenbergBlock, PortableTextBlock, PortableTextTextBlock, PortableTextImageBlock, PortableTextCodeBlock, PortableTextEmbedBlock, PortableTextGalleryBlock, PortableTextColumnsBlock, PortableTextBreakBlock, PortableTextHtmlBlock, PortableTextButtonBlock, PortableTextButtonsBlock, PortableTextCoverBlock, PortableTextFileBlock, PortableTextPullquoteBlock, PortableTextSpan, PortableTextMarkDef, ConvertOptions, BlockTransformer, TransformContext, } from "./types.js"; // Re-export transformers for customization export { defaultTransformers, fallbackTransformer } from "./transformers/index.js"; export * as coreTransformers from "./transformers/core.js"; export * as embedTransformers from "./transformers/embed.js"; // Re-export inline utilities export { parseInlineContent, extractText, extractAlt, extractCaption, extractSrc, } from "./inline.js"; /** * Default key generator */ function createKeyGenerator(): () => string { let counter = 0; return () => { counter++; return `key-${counter}-${Math.random().toString(36).substring(2, 7)}`; }; } /** * Normalize parsed blocks from the WP parser into our GutenbergBlock type. * The WP parser returns `attrs: Record | null`, so we normalize * null attrs to empty objects and recursively process innerBlocks. */ function normalizeBlocks(blocks: ReturnType): GutenbergBlock[] { return blocks.map( (block): GutenbergBlock => ({ blockName: block.blockName, attrs: (block.attrs ?? {}) satisfies Record, innerHTML: block.innerHTML, innerBlocks: normalizeBlocks(block.innerBlocks), innerContent: block.innerContent, }), ); } /** * Convert WordPress Gutenberg content to Portable Text * * @param content - WordPress post content (HTML with Gutenberg block comments) * @param options - Conversion options * @returns Array of Portable Text blocks * * @example * ```ts * const portableText = gutenbergToPortableText(` * *

Hello world!

* * `); * // → [{ _type: "block", style: "normal", children: [...] }] * ``` */ export function gutenbergToPortableText( content: string, options: ConvertOptions = {}, ): PortableTextBlock[] { // Handle empty content if (!content || !content.trim()) { return []; } // Check if content has Gutenberg blocks const hasBlocks = content.includes("