/** * Integration tests using WordPress Theme Unit Test data * * Tests the full WordPress migration pipeline against the official * WordPress Theme Unit Test dataset. The test data is downloaded from * GitHub on first run and cached locally. * * @see https://github.com/WordPress/theme-test-data */ import { createReadStream, existsSync } from "node:fs"; import { mkdir, writeFile } from "node:fs/promises"; import { dirname, join } from "node:path"; import { gutenbergToPortableText } from "@emdash-cms/gutenberg-to-portable-text"; import { describe, it, expect, beforeAll } from "vitest"; import { parseWxr } from "../../../src/cli/wxr/parser.js"; // Test regex patterns const PARAGRAPH_WITH_TEXT_REGEX = /]*>[^<]+<\/p>/; const TEST_DATA_PATH = join( process.cwd(), "../../examples/wp-theme-unit-test/themeunittestdata.wordpress.xml", ); const TEST_DATA_URL = "https://raw.githubusercontent.com/WordPress/theme-test-data/master/themeunittestdata.wordpress.xml"; /** * Download the WordPress theme unit test data if it doesn't exist locally. */ async function ensureTestData(): Promise { if (existsSync(TEST_DATA_PATH)) return; console.log(`Downloading WordPress theme unit test data from ${TEST_DATA_URL}...`); const response = await fetch(TEST_DATA_URL); if (!response.ok) { throw new Error(`Failed to download test data: ${response.status} ${response.statusText}`); } const data = await response.text(); await mkdir(dirname(TEST_DATA_PATH), { recursive: true }); await writeFile(TEST_DATA_PATH, data, "utf-8"); console.log(`Downloaded to ${TEST_DATA_PATH}`); } describe("WordPress Theme Unit Test Migration", () => { let wxrData: Awaited>; beforeAll(async () => { await ensureTestData(); const stream = createReadStream(TEST_DATA_PATH, { encoding: "utf-8" }); wxrData = await parseWxr(stream); }); describe("WXR Parsing", () => { it("parses site metadata", () => { expect(wxrData.site.title).toBe("Theme Unit Test Data"); expect(wxrData.site.link).toBe("https://wpthemetestdata.wordpress.com"); expect(wxrData.site.language).toBe("en"); }); it("parses all posts", () => { // Theme Unit Test has many posts covering different scenarios expect(wxrData.posts.length).toBeGreaterThan(50); }); it("parses all pages", () => { const pages = wxrData.posts.filter((p) => p.postType === "page"); expect(pages.length).toBeGreaterThan(10); }); it("parses categories with hierarchy", () => { expect(wxrData.categories.length).toBeGreaterThan(20); // Check for parent-child relationships const parentCategory = wxrData.categories.find((c) => c.nicename === "parent-category"); expect(parentCategory).toBeDefined(); const childCategory = wxrData.categories.find((c) => c.nicename === "child-category-01"); expect(childCategory).toBeDefined(); expect(childCategory?.parent).toBe("parent-category"); }); it("parses tags", () => { expect(wxrData.tags.length).toBeGreaterThan(50); // Check for specific tags const wpTag = wxrData.tags.find((t) => t.slug === "wordpress"); expect(wpTag).toBeDefined(); expect(wpTag?.name).toBe("WordPress"); }); it("parses authors", () => { expect(wxrData.authors.length).toBeGreaterThanOrEqual(1); const author = wxrData.authors.find((a) => a.login === "themereviewteam"); expect(author).toBeDefined(); expect(author?.displayName).toBe("Theme Reviewer"); }); it("parses attachments", () => { expect(wxrData.attachments.length).toBeGreaterThan(0); }); it("parses post categories and tags", () => { // Find a post with both categories and tags const postsWithTaxonomies = wxrData.posts.filter( (p) => p.categories.length > 0 || p.tags.length > 0, ); expect(postsWithTaxonomies.length).toBeGreaterThan(0); }); }); describe("Gutenberg Block Conversion", () => { it("converts paragraph blocks", () => { const post = wxrData.posts.find((p) => p.content?.includes("wp:paragraph")); expect(post).toBeDefined(); const result = gutenbergToPortableText(post!.content || ""); expect(result.length).toBeGreaterThan(0); const block = result.find((b) => b._type === "block"); expect(block).toBeDefined(); }); it("converts heading blocks with different levels", () => { const post = wxrData.posts.find((p) => p.title === "WP 6.1 Font size scale"); expect(post).toBeDefined(); const result = gutenbergToPortableText(post!.content || ""); // Should have h2 headings const headings = result.filter( (b) => b._type === "block" && (b as any).style?.startsWith("h"), ); expect(headings.length).toBeGreaterThan(0); }); it("converts list blocks", () => { // Find a post with list content const post = wxrData.posts.find((p) => p.content?.includes("wp:list")); expect(post).toBeDefined(); const result = gutenbergToPortableText(post!.content || ""); const listItems = result.filter((b) => b._type === "block" && (b as any).listItem); expect(listItems.length).toBeGreaterThan(0); }); it("converts image blocks", () => { const post = wxrData.posts.find((p) => p.content?.includes("wp:image")); expect(post).toBeDefined(); const result = gutenbergToPortableText(post!.content || ""); const images = result.filter((b) => b._type === "image"); expect(images.length).toBeGreaterThan(0); }); it("converts quote blocks", () => { const post = wxrData.posts.find((p) => p.content?.includes("wp:quote")); expect(post).toBeDefined(); const result = gutenbergToPortableText(post!.content || ""); const quotes = result.filter((b) => b._type === "block" && (b as any).style === "blockquote"); expect(quotes.length).toBeGreaterThan(0); }); it("converts code blocks", () => { const post = wxrData.posts.find((p) => p.content?.includes("wp:code")); expect(post).toBeDefined(); const result = gutenbergToPortableText(post!.content || ""); const codeBlocks = result.filter((b) => b._type === "code"); expect(codeBlocks.length).toBeGreaterThan(0); }); it("converts group blocks by flattening", () => { const post = wxrData.posts.find((p) => p.content?.includes("wp:group")); expect(post).toBeDefined(); const result = gutenbergToPortableText(post!.content || ""); // Groups should be flattened - no group type in output const groups = result.filter((b) => b._type === "group"); expect(groups.length).toBe(0); // But their content should still be present expect(result.length).toBeGreaterThan(0); }); it("handles classic editor content (no block markers)", () => { // A classic-category post with no Gutenberg block comments, so this // genuinely exercises the classic-HTML fallback path in the converter // rather than passing on whatever happens to be in the category. const classicPost = wxrData.posts.find( (p) => p.categories.includes("classic") && !!p.content?.trim() && !p.content.includes("

Test

https://www.youtube.com/watch?v=abc123
`; const result = gutenbergToPortableText(content); const embeds = result.filter((b) => b._type === "embed"); expect(embeds.length).toBeGreaterThan(0); }); }); describe("Content Integrity", () => { it("preserves all text content through conversion", () => { // Take a sample of posts and verify text isn't lost const samplePosts = wxrData.posts.slice(0, 10); let assertedCount = 0; for (const post of samplePosts) { if (!post.content) continue; const result = gutenbergToPortableText(post.content); // Extract all text from result const extractedText = result .map((block) => { if (block._type === "block" && (block as any).children) { return (block as any).children.map((c: any) => c.text || "").join(""); } if (block._type === "code") { return (block as any).code || ""; } return ""; }) .join(" ") .trim(); // If there was content, we should have extracted some text // (unless it was all images/embeds) if (post.content.includes("

") || post.content.includes("wp:paragraph")) { // Only check if there was actual text content const hasTextContent = PARAGRAPH_WITH_TEXT_REGEX.test(post.content); if (hasTextContent) { expect(extractedText.length).toBeGreaterThan(0); assertedCount++; } } } // Guard against the loop silently asserting nothing. expect(assertedCount).toBeGreaterThan(0); }); }); describe("Statistics", () => { it("reports conversion statistics", () => { let totalPosts = 0; let successfulConversions = 0; let failedConversions = 0; let totalBlocks = 0; const blockTypes = new Map(); for (const post of wxrData.posts) { totalPosts++; try { const result = gutenbergToPortableText(post.content || ""); successfulConversions++; totalBlocks += result.length; for (const block of result) { const type = block._type; blockTypes.set(type, (blockTypes.get(type) || 0) + 1); } } catch { failedConversions++; } } // Log statistics (visible in test output with --reporter=verbose) console.log("\n=== WordPress Migration Statistics ==="); console.log(`Total posts: ${totalPosts}`); console.log(`Successful: ${successfulConversions}`); console.log(`Failed: ${failedConversions}`); console.log(`Total blocks generated: ${totalBlocks}`); console.log("\nBlock types:"); for (const [type, count] of blockTypes.entries()) { console.log(` ${type}: ${count}`); } console.log("=====================================\n"); // All conversions should succeed expect(failedConversions).toBe(0); expect(successfulConversions).toBe(totalPosts); }); }); });