{"version":3,"file":"utf8_support.mjs","sources":["../../src/utf8_support.ts"],"sourcesContent":["/**\n * Ensures a string is compatible with Prometheus' UTF-8 handling rules.\n *\n * Prometheus has specific rules for handling UTF-8 strings in metric names and label values:\n * - Legacy names (matching pattern [a-zA-Z_:][a-zA-Z0-9_:]*) are used as-is\n * - Non-legacy names containing UTF-8 characters must be wrapped in double quotes\n *\n * @param value - The string to make UTF-8 compatible\n * @returns The original string if it's empty or a valid legacy name, otherwise the string wrapped in double quotes\n *\n * @example\n * utf8Support('metric_name') // returns 'metric_name'\n * utf8Support('metric-📈') // returns '\"metric-📈\"'\n */\nexport const utf8Support = (value: string) => {\n  if (value === '') {\n    return value;\n  }\n  const isLegacyLabel = isValidLegacyName(value);\n  if (isLegacyLabel) {\n    return value;\n  }\n  return `\"${value}\"`;\n};\n\n/**\n * Escapes a string to make it compatible with Prometheus UTF-8 support.\n *\n * This function converts non-legacy name characters to an escaped format:\n * - Underscores are doubled as '__'\n * - Valid legacy runes are preserved as-is\n * - Invalid code points are replaced with '_FFFD_'\n * - Other characters are converted to '_HEX_' format where HEX is the hexadecimal code point\n *\n * @param value - The string to escape\n * @returns An escaped string prefixed with 'U__' that is compatible with Prometheus\n *\n * @example\n * escapeForUtf8Support(\"my lovely_http.status:sum\") // returns U__my_20_lovely__http_2e_status:sum\n * escapeForUtf8Support(\"label with 😱\") // returns U__label_20_with_20__1f631_\n */\nexport const escapeForUtf8Support = (value: string) => {\n  const isLegacyLabel = isValidLegacyName(value);\n  if (isLegacyLabel) {\n    return value;\n  }\n\n  let escaped = 'U__';\n\n  for (let i = 0; i < value.length; i++) {\n    const char = value[i];\n    const codePoint = value.codePointAt(i);\n\n    if (char === '_') {\n      escaped += '__';\n    } else if (codePoint !== undefined && isValidLegacyRune(char, i)) {\n      escaped += char;\n    } else if (codePoint === undefined || !isValidCodePoint(codePoint)) {\n      escaped += '_FFFD_';\n    } else {\n      escaped += '_';\n      escaped += codePoint.toString(16); // Convert code point to hexadecimal\n      escaped += '_';\n    }\n\n    // Handle surrogate pairs for characters outside the Basic Multilingual Plane\n    if (codePoint !== undefined && codePoint > 0xffff) {\n      i++; // Skip the second half of the surrogate pair\n    }\n  }\n\n  return escaped;\n};\n\n/**\n * Checks if a string is a valid legacy (the standard) Prometheus metric or label name.\n *\n * Valid legacy (the standard) names match the pattern [a-zA-Z_:][a-zA-Z0-9_:]* which means:\n * - First character must be a letter, underscore, or colon\n * - Remaining characters can only be letters, numbers, underscores, or colons\n *\n * @param name - The string to check\n * @returns true if the string is a valid legacy (the standard) name, false otherwise\n */\nexport const isValidLegacyName = (name: string): boolean => {\n  if (name.length === 0) {\n    return false;\n  }\n\n  for (let i = 0; i < name.length; i++) {\n    const char = name[i];\n    if (!isValidLegacyRune(char, i)) {\n      return false;\n    }\n  }\n\n  return true;\n};\n\n/**\n * Checks if a character is valid for a legacy (the standard) Prometheus metric or label name.\n *\n * This is an implementation of the Prometheus model rune validation logic, which\n * determines if a character is allowed in a legacy metric or label name.\n * https://github.com/prometheus/common/blob/v0.64.0/model/metric.go#L430-L432\n *\n * @param char - The character to check\n * @param index - The position of the character in the string\n * @returns true if the character is valid at the given position, false otherwise\n */\nconst isValidLegacyRune = (char: string, index: number): boolean => {\n  const codePoint = char.codePointAt(0);\n  if (codePoint === undefined) {\n    return false;\n  }\n\n  return (\n    (codePoint >= 97 && codePoint <= 122) || // 'a' to 'z'\n    (codePoint >= 65 && codePoint <= 90) || // 'A' to 'Z'\n    codePoint === 95 || // '_'\n    codePoint === 58 || // ':'\n    (codePoint >= 48 && codePoint <= 57 && index > 0) // '0' to '9', but not at the start\n  );\n};\n\n/**\n * Validates if a Unicode code point is valid for UTF-8 encoding.\n *\n * @param codePoint - The Unicode code point to validate\n * @returns true if the code point is valid (between 0 and 0x10FFFF), false otherwise\n */\nconst isValidCodePoint = (codePoint: number): boolean => {\n  return codePoint >= 0 && codePoint <= 0x10ffff;\n};\n\n/**\n * Wraps each key in a Prometheus filter string with UTF-8 support.\n *\n * This function processes a filter string (e.g. 'metric=\"value\",name=~\"pattern\"')\n * and applies UTF-8 support to each key while preserving the operators and values.\n * It handles quoted values and comma separators correctly.\n *\n * @param filterStr - The filter string to process\n * @returns A new filter string with UTF-8 support applied to the keys\n */\nexport const wrapUtf8Filters = (filterStr: string): string => {\n  const resultArray: string[] = [];\n  const operatorRegex = /(=~|!=|!~|=)/; // NOTE: the order of the operators is important here\n  let currentKey = '';\n  let currentValue = '';\n  let inQuotes = false;\n  let temp = '';\n  const addResult = () => {\n    const operatorMatch = temp.match(operatorRegex);\n    if (operatorMatch) {\n      const operator = operatorMatch[0];\n      [currentKey, currentValue] = temp.split(operator);\n      resultArray.push(`${utf8Support(currentKey.trim())}${operator}\"${currentValue.slice(1, -1)}\"`);\n    }\n  };\n\n  for (const char of filterStr) {\n    if (char === '\"' && temp[temp.length - 1] !== '\\\\') {\n      // Toggle inQuotes when an unescaped quote is found\n      inQuotes = !inQuotes;\n      temp += char;\n    } else if (char === ',' && !inQuotes) {\n      // When outside quotes and encountering ',', finalize the current pair\n      addResult();\n      temp = ''; // Reset for the next pair\n    } else {\n      // Collect characters\n      temp += char;\n    }\n  }\n\n  // Handle the last key-value pair\n  if (temp) {\n    addResult();\n  }\n  return resultArray.join(',');\n};\n"],"names":[],"mappings":";AAcO,MAAM,WAAA,GAAc,CAAC,KAAA,KAAkB;AAC5C,EAAA,IAAI,UAAU,EAAA,EAAI;AAChB,IAAA,OAAO,KAAA;AAAA,EACT;AACA,EAAA,MAAM,aAAA,GAAgB,kBAAkB,KAAK,CAAA;AAC7C,EAAA,IAAI,aAAA,EAAe;AACjB,IAAA,OAAO,KAAA;AAAA,EACT;AACA,EAAA,OAAO,IAAI,KAAK,CAAA,CAAA,CAAA;AAClB;AAkBO,MAAM,oBAAA,GAAuB,CAAC,KAAA,KAAkB;AACrD,EAAA,MAAM,aAAA,GAAgB,kBAAkB,KAAK,CAAA;AAC7C,EAAA,IAAI,aAAA,EAAe;AACjB,IAAA,OAAO,KAAA;AAAA,EACT;AAEA,EAAA,IAAI,OAAA,GAAU,KAAA;AAEd,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,KAAA,CAAM,QAAQ,CAAA,EAAA,EAAK;AACrC,IAAA,MAAM,IAAA,GAAO,MAAM,CAAC,CAAA;AACpB,IAAA,MAAM,SAAA,GAAY,KAAA,CAAM,WAAA,CAAY,CAAC,CAAA;AAErC,IAAA,IAAI,SAAS,GAAA,EAAK;AAChB,MAAA,OAAA,IAAW,IAAA;AAAA,IACb,WAAW,SAAA,KAAc,KAAA,CAAA,IAAa,iBAAA,CAAkB,IAAA,EAAM,CAAC,CAAA,EAAG;AAChE,MAAA,OAAA,IAAW,IAAA;AAAA,IACb,WAAW,SAAA,KAAc,KAAA,CAAA,IAAa,CAAC,gBAAA,CAAiB,SAAS,CAAA,EAAG;AAClE,MAAA,OAAA,IAAW,QAAA;AAAA,IACb,CAAA,MAAO;AACL,MAAA,OAAA,IAAW,GAAA;AACX,MAAA,OAAA,IAAW,SAAA,CAAU,SAAS,EAAE,CAAA;AAChC,MAAA,OAAA,IAAW,GAAA;AAAA,IACb;AAGA,IAAA,IAAI,SAAA,KAAc,KAAA,CAAA,IAAa,SAAA,GAAY,KAAA,EAAQ;AACjD,MAAA,CAAA,EAAA;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,OAAA;AACT;AAYO,MAAM,iBAAA,GAAoB,CAAC,IAAA,KAA0B;AAC1D,EAAA,IAAI,IAAA,CAAK,WAAW,CAAA,EAAG;AACrB,IAAA,OAAO,KAAA;AAAA,EACT;AAEA,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,IAAA,CAAK,QAAQ,CAAA,EAAA,EAAK;AACpC,IAAA,MAAM,IAAA,GAAO,KAAK,CAAC,CAAA;AACnB,IAAA,IAAI,CAAC,iBAAA,CAAkB,IAAA,EAAM,CAAC,CAAA,EAAG;AAC/B,MAAA,OAAO,KAAA;AAAA,IACT;AAAA,EACF;AAEA,EAAA,OAAO,IAAA;AACT;AAaA,MAAM,iBAAA,GAAoB,CAAC,IAAA,EAAc,KAAA,KAA2B;AAClE,EAAA,MAAM,SAAA,GAAY,IAAA,CAAK,WAAA,CAAY,CAAC,CAAA;AACpC,EAAA,IAAI,cAAc,KAAA,CAAA,EAAW;AAC3B,IAAA,OAAO,KAAA;AAAA,EACT;AAEA,EAAA,OACG,SAAA,IAAa,MAAM,SAAA,IAAa,GAAA;AAAA,EAChC,SAAA,IAAa,MAAM,SAAA,IAAa,EAAA;AAAA,EACjC,SAAA,KAAc,EAAA;AAAA,EACd,SAAA,KAAc,EAAA;AAAA,EACb,SAAA,IAAa,EAAA,IAAM,SAAA,IAAa,EAAA,IAAM,KAAA,GAAQ,CAAA;AAEnD,CAAA;AAQA,MAAM,gBAAA,GAAmB,CAAC,SAAA,KAA+B;AACvD,EAAA,OAAO,SAAA,IAAa,KAAK,SAAA,IAAa,OAAA;AACxC,CAAA;AAYO,MAAM,eAAA,GAAkB,CAAC,SAAA,KAA8B;AAC5D,EAAA,MAAM,cAAwB,EAAC;AAC/B,EAAA,MAAM,aAAA,GAAgB,cAAA;AACtB,EAAA,IAAI,UAAA,GAAa,EAAA;AACjB,EAAA,IAAI,YAAA,GAAe,EAAA;AACnB,EAAA,IAAI,QAAA,GAAW,KAAA;AACf,EAAA,IAAI,IAAA,GAAO,EAAA;AACX,EAAA,MAAM,YAAY,MAAM;AACtB,IAAA,MAAM,aAAA,GAAgB,IAAA,CAAK,KAAA,CAAM,aAAa,CAAA;AAC9C,IAAA,IAAI,aAAA,EAAe;AACjB,MAAA,MAAM,QAAA,GAAW,cAAc,CAAC,CAAA;AAChC,MAAA,CAAC,UAAA,EAAY,YAAY,CAAA,GAAI,IAAA,CAAK,MAAM,QAAQ,CAAA;AAChD,MAAA,WAAA,CAAY,IAAA,CAAK,CAAA,EAAG,WAAA,CAAY,UAAA,CAAW,MAAM,CAAC,CAAA,EAAG,QAAQ,IAAI,YAAA,CAAa,KAAA,CAAM,CAAA,EAAG,CAAA,CAAE,CAAC,CAAA,CAAA,CAAG,CAAA;AAAA,IAC/F;AAAA,EACF,CAAA;AAEA,EAAA,KAAA,MAAW,QAAQ,SAAA,EAAW;AAC5B,IAAA,IAAI,SAAS,GAAA,IAAO,IAAA,CAAK,KAAK,MAAA,GAAS,CAAC,MAAM,IAAA,EAAM;AAElD,MAAA,QAAA,GAAW,CAAC,QAAA;AACZ,MAAA,IAAA,IAAQ,IAAA;AAAA,IACV,CAAA,MAAA,IAAW,IAAA,KAAS,GAAA,IAAO,CAAC,QAAA,EAAU;AAEpC,MAAA,SAAA,EAAU;AACV,MAAA,IAAA,GAAO,EAAA;AAAA,IACT,CAAA,MAAO;AAEL,MAAA,IAAA,IAAQ,IAAA;AAAA,IACV;AAAA,EACF;AAGA,EAAA,IAAI,IAAA,EAAM;AACR,IAAA,SAAA,EAAU;AAAA,EACZ;AACA,EAAA,OAAO,WAAA,CAAY,KAAK,GAAG,CAAA;AAC7B;;;;"}