]]>
2publishpage
`;
const result = await parseWxr(createStream(wxr));
expect(result.posts).toHaveLength(1);
expect(result.posts[0]?.title).toBe("About Us");
expect(result.posts[0]?.postType).toBe("page");
});
it("parses attachments", async () => {
const wxr = `
Test Image10attachmenthttps://example.com/wp-content/uploads/2024/01/test.jpg`;
const result = await parseWxr(createStream(wxr));
expect(result.posts).toHaveLength(0);
expect(result.attachments).toHaveLength(1);
expect(result.attachments[0]?.id).toBe(10);
expect(result.attachments[0]?.title).toBe("Test Image");
expect(result.attachments[0]?.url).toContain("test.jpg");
});
it("parses categories", async () => {
const wxr = `
1uncategorized2newsuncategorized`;
const result = await parseWxr(createStream(wxr));
expect(result.categories).toHaveLength(2);
expect(result.categories[0]?.nicename).toBe("uncategorized");
expect(result.categories[0]?.name).toBe("Uncategorized");
expect(result.categories[1]?.parent).toBe("uncategorized");
});
it("parses tags", async () => {
const wxr = `
5javascript`;
const result = await parseWxr(createStream(wxr));
expect(result.tags).toHaveLength(1);
expect(result.tags[0]?.slug).toBe("javascript");
expect(result.tags[0]?.name).toBe("JavaScript");
});
it("parses post categories and tags", async () => {
const wxr = `
Tagged Postpost`;
const result = await parseWxr(createStream(wxr));
expect(result.posts[0]?.categories).toContain("news");
expect(result.posts[0]?.tags).toContain("javascript");
expect(result.posts[0]?.tags).toContain("typescript");
});
it("parses authors", async () => {
const wxr = `
1adminadmin@example.comAdminUser`;
const result = await parseWxr(createStream(wxr));
expect(result.authors).toHaveLength(1);
expect(result.authors[0]?.login).toBe("admin");
expect(result.authors[0]?.email).toBe("admin@example.com");
expect(result.authors[0]?.displayName).toBe("Administrator");
});
it("parses post meta", async () => {
const wxr = `
Post with Metapost_yoast_wpseo_titleSEO Title_yoast_wpseo_metadescSEO Description`;
const result = await parseWxr(createStream(wxr));
expect(result.posts[0]?.meta.get("_yoast_wpseo_title")).toBe("SEO Title");
expect(result.posts[0]?.meta.get("_yoast_wpseo_metadesc")).toBe("SEO Description");
});
it("handles empty WXR", async () => {
const wxr = `
Empty Site`;
const result = await parseWxr(createStream(wxr));
expect(result.posts).toHaveLength(0);
expect(result.attachments).toHaveLength(0);
expect(result.categories).toHaveLength(0);
});
it("parses page hierarchy (post_parent and menu_order)", async () => {
const wxr = `
Parent Page10page01Child Page11page102`;
const result = await parseWxr(createStream(wxr));
expect(result.posts).toHaveLength(2);
expect(result.posts[0]?.postParent).toBe(0);
expect(result.posts[0]?.menuOrder).toBe(1);
expect(result.posts[1]?.postParent).toBe(10);
expect(result.posts[1]?.menuOrder).toBe(2);
});
it("parses generic wp:term elements (custom taxonomies)", async () => {
const wxr = `
100genresci-fi101genrefantasysci-fi`;
const result = await parseWxr(createStream(wxr));
expect(result.terms).toHaveLength(2);
expect(result.terms[0]?.id).toBe(100);
expect(result.terms[0]?.taxonomy).toBe("genre");
expect(result.terms[0]?.slug).toBe("sci-fi");
expect(result.terms[0]?.name).toBe("Science Fiction");
expect(result.terms[0]?.description).toBe("Science fiction books");
expect(result.terms[1]?.parent).toBe("sci-fi");
});
it("parses nav_menu terms and nav_menu_item posts into structured menus", async () => {
const wxr = `
5nav_menumain-menuHome50nav_menu_item1_menu_item_typecustom_menu_item_urlhttps://example.com/_menu_item_menu_item_parent0About51nav_menu_item2_menu_item_typepost_type_menu_item_objectpage_menu_item_object_id10_menu_item_menu_item_parent0`;
const result = await parseWxr(createStream(wxr));
// Check terms array includes nav_menu term
expect(result.terms.some((t) => t.taxonomy === "nav_menu")).toBe(true);
// Check nav_menu_item posts are in posts array
expect(result.posts.filter((p) => p.postType === "nav_menu_item")).toHaveLength(2);
// Check structured navMenus
expect(result.navMenus).toHaveLength(1);
expect(result.navMenus[0]?.name).toBe("main-menu");
expect(result.navMenus[0]?.id).toBe(5);
expect(result.navMenus[0]?.items).toHaveLength(2);
// Check menu items are sorted by menu_order
expect(result.navMenus[0]?.items[0]?.title).toBe("Home");
expect(result.navMenus[0]?.items[0]?.type).toBe("custom");
expect(result.navMenus[0]?.items[0]?.url).toBe("https://example.com/");
expect(result.navMenus[0]?.items[0]?.sortOrder).toBe(1);
expect(result.navMenus[0]?.items[1]?.title).toBe("About");
expect(result.navMenus[0]?.items[1]?.type).toBe("post_type");
expect(result.navMenus[0]?.items[1]?.objectType).toBe("page");
expect(result.navMenus[0]?.items[1]?.objectId).toBe(10);
expect(result.navMenus[0]?.items[1]?.sortOrder).toBe(2);
});
it("parses custom taxonomy assignments on posts", async () => {
const wxr = `
Book Review1post`;
const result = await parseWxr(createStream(wxr));
expect(result.posts[0]?.categories).toContain("reviews");
expect(result.posts[0]?.customTaxonomies?.get("genre")).toContain("sci-fi");
expect(result.posts[0]?.customTaxonomies?.get("genre")).toContain("dystopian");
expect(result.posts[0]?.customTaxonomies?.get("reading_level")).toContain("advanced");
});
describe("multilingual plugin metadata (issue #1080)", () => {
it("promotes WPML _icl_lang_code and trid to locale + translationGroup", async () => {
const wxr = `
Hello1posthello_icl_lang_code_icl_translation_idMərhəba2posthello_icl_lang_code_icl_translation_id`;
const result = await parseWxr(createStream(wxr));
expect(result.posts).toHaveLength(2);
expect(result.posts[0]?.locale).toBe("en");
expect(result.posts[1]?.locale).toBe("ar");
// Both translations share the same group key. Prefix is opaque
// but stable so the execute route can group on it.
expect(result.posts[0]?.translationGroup).toBe(result.posts[1]?.translationGroup);
expect(result.posts[0]?.translationGroup).toContain("42");
});
it("falls back to WPML legacy `trid` meta key when _icl_translation_id is absent", async () => {
const wxr = `
Legacy1post_icl_lang_codetrid`;
const result = await parseWxr(createStream(wxr));
expect(result.posts[0]?.locale).toBe("fr");
expect(result.posts[0]?.translationGroup).toContain("7");
});
it("derives locale from Polylang's `language` taxonomy when WPML is absent", async () => {
const wxr = `
Bonjour1post`;
const result = await parseWxr(createStream(wxr));
expect(result.posts[0]?.locale).toBe("fr");
});
it("derives a stable Polylang translationGroup from _translations meta", async () => {
// Polylang stores `_translations` as a serialized PHP map. We
// hash the post IDs into a stable key shared by every member of
// the group. The exact format isn't part of the contract -- the
// only guarantee is "same map -> same key".
const wxr = `
EN1post_translationsFR2post_translations`;
const result = await parseWxr(createStream(wxr));
expect(result.posts).toHaveLength(2);
expect(result.posts[0]?.locale).toBe("en");
expect(result.posts[1]?.locale).toBe("fr");
expect(result.posts[0]?.translationGroup).toBeDefined();
expect(result.posts[0]?.translationGroup).toBe(result.posts[1]?.translationGroup);
});
it("leaves locale/translationGroup undefined on monolingual exports", async () => {
const wxr = `
Mono1post`;
const result = await parseWxr(createStream(wxr));
expect(result.posts[0]?.locale).toBeUndefined();
expect(result.posts[0]?.translationGroup).toBeUndefined();
});
it("prefers WPML over Polylang when both are present", async () => {
const wxr = `
Conflict1post_icl_lang_code`;
const result = await parseWxr(createStream(wxr));
expect(result.posts[0]?.locale).toBe("en");
});
it("handles Polylang _translations payloads with multibyte string keys", async () => {
// PHP `s:LEN:"..."` counts BYTES of the payload, not chars. A
// payload like `é` (2 bytes UTF-8) would shift JS UTF-16
// position by 1, but the byte length is 2. The parser must
// advance by bytes or it will misalign and drop subsequent
// integer tokens.
//
// Real-world trigger: any non-ASCII locale code or label string
// in `_translations`. We don't expect Polylang to produce such
// keys, but the parser must not corrupt the group when it
// does.
const wxr = `
A1post_translationsB2post_translations`;
const result = await parseWxr(createStream(wxr));
expect(result.posts).toHaveLength(2);
// Both ids extracted; group key has BOTH 1 and 2, not just 2.
expect(result.posts[0]?.translationGroup).toBe("pll:1,2");
expect(result.posts[1]?.translationGroup).toBe("pll:1,2");
});
it("ignores `i:N;` literals embedded inside Polylang `_translations` string values", async () => {
// String values can contain `i:N;` text that the naive regex
// would erroneously match. The length-aware parser walks
// `s:LEN:"..."` blocks and skips their payloads exactly.
// The real translation IDs are `i:1;` and `i:7;`; the embedded
// `i:99;` inside a string value must NOT contribute.
const wxr = `
A1post_translationsB2post_translations`;
const result = await parseWxr(createStream(wxr));
expect(result.posts).toHaveLength(2);
const groupA = result.posts[0]?.translationGroup;
const groupB = result.posts[1]?.translationGroup;
expect(groupA).toBe(groupB);
// Group key derives from the integer post IDs, not the
// embedded `99` -- post-id list is `[1, 7]`.
expect(groupA).toBe("pll:1,7");
});
it("falls back to _icl_translation_id when trid is absent", async () => {
// `trid` is preferred (it's the shared translation group id),
// but legacy / partial exports may only have
// `_icl_translation_id`. Accept it as a fallback so single-
// translation exports still get a group key.
const wxr = `
Solo1post_icl_lang_code_icl_translation_id`;
const result = await parseWxr(createStream(wxr));
expect(result.posts[0]?.locale).toBe("en");
expect(result.posts[0]?.translationGroup).toBe("wpml:99");
});
it("captures per-item category text body as a taxonomyLabels entry", async () => {
const wxr = `
Hello1post`;
const result = await parseWxr(createStream(wxr));
const post = result.posts[0];
expect(post?.taxonomyLabels).toBeDefined();
// Keys are `${normalisedTaxonomy}\u0000${slug}`. `post_tag`
// normalises to `tag`.
expect(post?.taxonomyLabels?.get("category\u0000breaking-news")).toBe("Breaking News");
expect(post?.taxonomyLabels?.get("tag\u0000featured")).toBe("Featured");
expect(post?.taxonomyLabels?.get("genre\u0000sci-fi")).toBe("Science Fiction");
});
});
});