function escapeRegExp(value: string): string { return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } function normalizeWhitespace(value: string): string { return value.replace(/\s+/g, ' '); } function buildFlexiblePattern(value: string): string { return normalizeWhitespace(value) .split(' ') .map((segment) => escapeRegExp(segment)) .join('\\s+'); } function buildTokenizedReplacement( sourceString: string, flexible: boolean ) { // Split keeps placeholder names because the capturing group is retained const parts = sourceString.split(/\{([^}]+)\}/g) const toPattern = flexible ? buildFlexiblePattern : escapeRegExp const tokenNames: string[] = [] let patternString = '^(\\s*)' parts.forEach((part, index) => { const isToken = index % 2 === 1 if (isToken) { tokenNames.push(part.trim()) // Require at least one character (non-greedy) patternString += '(.+?)' } else if (part) { patternString += toPattern(part) } }) patternString += '(\\s*)$' const regex = new RegExp(patternString) return { regex, tokenNames } } function testMatch(label: string, source: string, input: string, expectedTokens?: string[]) { console.log(`\n--- ${label} ---`); const { regex, tokenNames } = buildTokenizedReplacement(source, true); const match = input.match(regex); console.log(`Matched? ${!!match} (Input: "${input.replace(/\n/g, '\\n').replace(/\u00A0/g, ' ')}")`); if (match) { const captured: Record = {}; tokenNames.forEach((name, idx) => { const key = name || `token${idx}`; if (!captured[key]) captured[key] = []; captured[key].push(match[idx + 2]); // skip leading whitespace }); console.log('Captured tokens:', captured); if (expectedTokens) { const actualTokens: string[] = tokenNames.map((_, idx) => match[idx + 2]); const pass = actualTokens.length === expectedTokens.length && actualTokens.every((val, i) => val === expectedTokens[i]); if (pass) { console.log(`Token extraction: PASS (${JSON.stringify(actualTokens)})`); } else { console.error(`Token extraction: FAIL\n Expected: ${JSON.stringify(expectedTokens)}\n Actual: ${JSON.stringify(actualTokens)}`); } } } else if (expectedTokens) { console.error(`Token extraction: FAIL (No match found)`); } } // Case 1: HTML Entities testMatch("Case 1: NBSP", "under Made in Webflow.", " under\u00A0Made in Webflow."); // Case 2: Split nodes (Simulation) // If the text is split, we only match against one chunk. testMatch("Case 2: Split Start", "You can select up to {*} profile handles...", "You can select up to "); testMatch("Case 2: Split End", "You can select up to {*} profile handles...", "3 profile handles..."); // Case 3: Newlines testMatch("Case 3: Newline in input", "under Made in Webflow.", "\nunder Made in Webflow."); testMatch("Case 3: Inter-word Newline", "under Made in Webflow.", "under\nMade in Webflow."); // Case 4: Word boundary / punctuation testMatch("Case 4: Punctuation", "Made in Webflow.", "Made in Webflow"); // Mismatch expected (missing dot) testMatch("Case 4: Trailing s", "handle", "handles"); // Expected mismatch // Case 5: Fuzzy Matching {*} testMatch("Case 5: Multiple {*}", "Hello {*}, welcome to {*}", "Hello John, welcome to Webflow", ["John", "Webflow"]);