{"version":3,"file":"accent-normalization.cjs","sources":["../../../src/utils/accent-normalization.ts"],"sourcesContent":["/**\n * Accent Normalization Utilities\n * Removes diacritics and accents from text for better matching\n */\n\n/**\n * Comprehensive accent/diacritic mapping\n * Maps accented characters to their base forms\n */\nconst ACCENT_MAP: Record<string, string> = {\n  // Latin Extended-A\n  à: \"a\",\n  á: \"a\",\n  â: \"a\",\n  ã: \"a\",\n  ä: \"a\",\n  å: \"a\",\n  ā: \"a\",\n  ă: \"a\",\n  ą: \"a\",\n  À: \"A\",\n  Á: \"A\",\n  Â: \"A\",\n  Ã: \"A\",\n  Ä: \"A\",\n  Å: \"A\",\n  Ā: \"A\",\n  Ă: \"A\",\n  Ą: \"A\",\n\n  è: \"e\",\n  é: \"e\",\n  ê: \"e\",\n  ë: \"e\",\n  ē: \"e\",\n  ĕ: \"e\",\n  ė: \"e\",\n  ę: \"e\",\n  ě: \"e\",\n  È: \"E\",\n  É: \"E\",\n  Ê: \"E\",\n  Ë: \"E\",\n  Ē: \"E\",\n  Ĕ: \"E\",\n  Ė: \"E\",\n  Ę: \"E\",\n  Ě: \"E\",\n\n  ì: \"i\",\n  í: \"i\",\n  î: \"i\",\n  ï: \"i\",\n  ĩ: \"i\",\n  ī: \"i\",\n  ĭ: \"i\",\n  į: \"i\",\n  Ì: \"I\",\n  Í: \"I\",\n  Î: \"I\",\n  Ï: \"I\",\n  Ĩ: \"I\",\n  Ī: \"I\",\n  Ĭ: \"I\",\n  Į: \"I\",\n\n  'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ö': 'o', 'ø': 'o', 'ō': 'o', 'ŏ': 'o', 'ő': 'o',\n  'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö': 'O', 'Ø': 'O', 'Ō': 'O', 'Ŏ': 'O', 'Ő': 'O',\n\n  'ù': 'u', 'ú': 'u', 'û': 'u', 'ü': 'u', 'ũ': 'u', 'ū': 'u', 'ŭ': 'u', 'ů': 'u', 'ű': 'u', 'ų': 'u',\n  'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ũ': 'U', 'Ū': 'U', 'Ŭ': 'U', 'Ů': 'U', 'Ű': 'U', 'Ų': 'U',\n\n  ý: \"y\",\n  ÿ: \"y\",\n  ŷ: \"y\",\n  Ý: \"Y\",\n  Ÿ: \"Y\",\n  Ŷ: \"Y\",\n\n  ñ: \"n\",\n  ń: \"n\",\n  ņ: \"n\",\n  ň: \"n\",\n  Ñ: \"N\",\n  Ń: \"N\",\n  Ņ: \"N\",\n  Ň: \"N\",\n\n  ç: \"c\",\n  ć: \"c\",\n  ĉ: \"c\",\n  ċ: \"c\",\n  č: \"c\",\n  Ç: \"C\",\n  Ć: \"C\",\n  Ĉ: \"C\",\n  Ċ: \"C\",\n  Č: \"C\",\n\n  ß: \"ss\", // German sharp s\n\n  ð: \"d\",\n  đ: \"d\",\n  Ð: \"D\",\n  Đ: \"D\",\n\n  ĝ: \"g\",\n  ğ: \"g\",\n  ġ: \"g\",\n  ģ: \"g\",\n  Ĝ: \"G\",\n  Ğ: \"G\",\n  Ġ: \"G\",\n  Ģ: \"G\",\n\n  ĥ: \"h\",\n  ħ: \"h\",\n  Ĥ: \"H\",\n  Ħ: \"H\",\n\n  ĵ: \"j\",\n  Ĵ: \"J\",\n\n  ķ: \"k\",\n  Ķ: \"K\",\n\n  ĺ: \"l\",\n  ļ: \"l\",\n  ľ: \"l\",\n  ŀ: \"l\",\n  ł: \"l\",\n  Ĺ: \"L\",\n  Ļ: \"L\",\n  Ľ: \"L\",\n  Ŀ: \"L\",\n  Ł: \"L\",\n\n  ŕ: \"r\",\n  ŗ: \"r\",\n  ř: \"r\",\n  Ŕ: \"R\",\n  Ŗ: \"R\",\n  Ř: \"R\",\n\n  ś: \"s\",\n  ŝ: \"s\",\n  ş: \"s\",\n  š: \"s\",\n  Ś: \"S\",\n  Ŝ: \"S\",\n  Ş: \"S\",\n  Š: \"S\",\n\n  ţ: \"t\",\n  ť: \"t\",\n  ŧ: \"t\",\n  Ţ: \"T\",\n  Ť: \"T\",\n  Ŧ: \"T\",\n\n  ŵ: \"w\",\n  Ŵ: \"W\",\n\n  ź: \"z\",\n  ż: \"z\",\n  ž: \"z\",\n  Ź: \"Z\",\n  Ż: \"Z\",\n  Ž: \"Z\",\n\n  æ: \"ae\",\n  œ: \"oe\",\n  Æ: \"AE\",\n  Œ: \"OE\",\n\n  þ: \"th\",\n  Þ: \"TH\",\n};\n\n/**\n * Cache for accent removal results\n * Dramatically speeds up repeated accent normalization\n */\nconst accentCache = new Map<string, string>();\nconst MAX_CACHE_SIZE = 10000; // TODO: Adjust based on memory constraints\n\n/**\n * Remove accents and diacritics from a string\n * Uses both custom mapping and Unicode normalization with caching\n */\nexport function removeAccents(text: string): string {\n  if (!text) return text;\n\n  // Check cache first (massive speedup for repeated words)\n  const cached = accentCache.get(text);\n  if (cached !== undefined) {\n    return cached;\n  }\n\n  // OPTIMIZATION: Use array join instead of string concatenation\n  const chars: string[] = [];\n  for (let i = 0; i < text.length; i++) {\n    const char = text[i];\n    chars.push(ACCENT_MAP[char] || char);\n  }\n  let result = chars.join('');\n\n  // Second pass: Use Unicode normalization for any remaining accents\n  // NFD = Canonical Decomposition (separates base char from combining marks)\n  // Then remove combining diacritical marks (Unicode range \\u0300-\\u036f)\n  result = result.normalize(\"NFD\").replace(/[\\u0300-\\u036f]/g, \"\");\n\n  // Cache the result (with size limit)\n  if (accentCache.size < MAX_CACHE_SIZE) {\n    accentCache.set(text, result);\n  } else if (accentCache.size === MAX_CACHE_SIZE) {\n    // Clear cache when it gets too large (keep most recent)\n    accentCache.clear();\n    accentCache.set(text, result);\n  }\n\n  return result;\n}\n\n/**\n * Check if a string contains any accented characters\n * Optimized with early return\n */\nexport function hasAccents(text: string): boolean {\n  if (!text) return false;\n\n  // OPTIMIZATION: Check custom map first (fast path)\n  for (let i = 0; i < text.length; i++) {\n    if (ACCENT_MAP[text[i]]) {\n      return true;\n    }\n  }\n\n  // OPTIMIZATION: Only normalize if we didn't find accents in map\n  // Check for combining diacritical marks\n  return /[\\u0300-\\u036f]/.test(text.normalize(\"NFD\"));\n}\n\n/**\n * Normalize text for accent-insensitive comparison\n * Converts to lowercase and removes accents\n */\nexport function normalizeForComparison(text: string): string {\n  return removeAccents(text.toLowerCase());\n}\n\n/**\n * Create accent-insensitive variants of a word\n * Returns both original and accent-free version\n */\nexport function getAccentVariants(word: string): string[] {\n  const normalized = removeAccents(word);\n\n  // If word has accents, return both versions\n  if (normalized !== word) {\n    return [word, normalized];\n  }\n\n  // Otherwise just return original\n  return [word];\n}\n"],"names":[],"mappings":";;AASA,MAAM,aAAqC;AAAA;AAAA,EAEzC,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EACrF,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAErF,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAC/F,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAAK,KAAK;AAAA,EAE/F,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AACL;AAMA,MAAM,kCAAkB,IAAA;AACxB,MAAM,iBAAiB;AAMhB,SAAS,cAAc,MAAsB;AAClD,MAAI,CAAC,KAAM,QAAO;AAGlB,QAAM,SAAS,YAAY,IAAI,IAAI;AACnC,MAAI,WAAW,QAAW;AACxB,WAAO;AAAA,EACT;AAGA,QAAM,QAAkB,CAAA;AACxB,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,OAAO,KAAK,CAAC;AACnB,UAAM,KAAK,WAAW,IAAI,KAAK,IAAI;AAAA,EACrC;AACA,MAAI,SAAS,MAAM,KAAK,EAAE;AAK1B,WAAS,OAAO,UAAU,KAAK,EAAE,QAAQ,oBAAoB,EAAE;AAG/D,MAAI,YAAY,OAAO,gBAAgB;AACrC,gBAAY,IAAI,MAAM,MAAM;AAAA,EAC9B,WAAW,YAAY,SAAS,gBAAgB;AAE9C,gBAAY,MAAA;AACZ,gBAAY,IAAI,MAAM,MAAM;AAAA,EAC9B;AAEA,SAAO;AACT;AAMO,SAAS,WAAW,MAAuB;AAChD,MAAI,CAAC,KAAM,QAAO;AAGlB,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,QAAI,WAAW,KAAK,CAAC,CAAC,GAAG;AACvB,aAAO;AAAA,IACT;AAAA,EACF;AAIA,SAAO,kBAAkB,KAAK,KAAK,UAAU,KAAK,CAAC;AACrD;AAMO,SAAS,uBAAuB,MAAsB;AAC3D,SAAO,cAAc,KAAK,aAAa;AACzC;AAMO,SAAS,kBAAkB,MAAwB;AACxD,QAAM,aAAa,cAAc,IAAI;AAGrC,MAAI,eAAe,MAAM;AACvB,WAAO,CAAC,MAAM,UAAU;AAAA,EAC1B;AAGA,SAAO,CAAC,IAAI;AACd;;;;;"}