import { NodeJSBuffer } from './lib/nodejs-buffer.js' // @NOTE This file is not meant to be exported directly. Instead, we re-export // public functions from ./utf8.ts. The reason for this separation is that this // file allows to test both the NodeJS-optimized and ponyfill implementations. export const utf8LenNode = NodeJSBuffer ? function utf8LenNode(string: string): number { return NodeJSBuffer!.byteLength(string, 'utf8') } : /* v8 ignore next -- @preserve */ null export function utf8LenCompute(string: string): number { // The code below is similar to TextEncoder's implementation of UTF-8 // encoding. However, using TextEncoder to get the byte length is slower // as it requires allocating a new Uint8Array and copying data: // return new TextEncoder().encode(string).byteLength // The base length is the string length (all ASCII) let len = string.length let code: number // The loop calculates the number of additional bytes needed for // non-ASCII characters for (let i = 0; i < string.length; i += 1) { code = string.charCodeAt(i) if (code <= 0x7f) { // ASCII, 1 byte } else if (code <= 0x7ff) { // 2 bytes char len += 1 } else { // 3 bytes char len += 2 // If the current char is a high surrogate, and the next char is a low // surrogate, skip the next char as the total is a 4 bytes char // (represented as a surrogate pair in UTF-16) and was already accounted // for. if (code >= 0xd800 && code <= 0xdbff) { code = string.charCodeAt(i + 1) if (code >= 0xdc00 && code <= 0xdfff) { i++ } } } } return len }