{"version":3,"sources":["../src/index.ts","../src/core/wordfreq.ts","../src/data/dataPath.ts","../src/data/readCBPack.ts","../src/numbers/index.ts","../src/tokenize/index.ts","../src/data/chinese.ts","../src/utils/languageInfo.ts","../src/utils/transliterate.ts","../src/utils/preprocess.ts","../src/utils/quotes.ts","../src/core/constants.ts","../src/core/frequency.ts"],"sourcesContent":["/**\n * Node.js 版 wordfreq 的公共入口。\n * 需求说明：对外暴露与 Python 版等价的 API，便于现有调用方平滑迁移。\n */\nexport {\n  wordFrequency,\n  zipfFrequency,\n  availableLanguages,\n  iterWordlist,\n  topNList,\n  randomWords,\n  randomAsciiWords,\n  getFrequencyList,\n  getFrequencyDict,\n} from \"./core/wordfreq\";\n\nexport { cBToFreq, cBToZipf, zipfToFreq, freqToZipf } from \"./core/frequency\";\nexport { simpleTokenize, tokenize, lossyTokenize } from \"./tokenize\";\nexport { digitFreq, smashNumbers, hasDigitSequence, benfordFreq, yearFreq } from \"./numbers\";\nexport { getDataDir, getDataPath } from \"./data/dataPath\";\n","import fs from \"node:fs\";\nimport path from \"node:path\";\n\nimport { LRUCache } from \"lru-cache\";\n\nimport { getDataDir } from \"../data/dataPath\";\nimport { readCBPack } from \"../data/readCBPack\";\nimport { digitFreq, hasDigitSequence, smashNumbers } from \"../numbers\";\nimport { lossyTokenize } from \"../tokenize\";\nimport { getLanguageInfo, normalizeLanguageTag } from \"../utils/languageInfo\";\n\nimport {\n  CACHE_SIZE,\n  DEFAULT_WORDLIST,\n  INFERRED_SPACE_FACTOR,\n  CHAR_COMBINATION_PENALTY,\n} from \"./constants\";\nimport { cBToFreq, freqToZipf, zipfToFreq } from \"./frequency\";\n\ntype FrequencyBuckets = string[][];\n\n// 缓存原始频率桶，避免多次解压 msgpack.gz，满足高频查询场景的性能要求。\nconst frequencyListCache = new Map<string, FrequencyBuckets>();\n// 缓存 token->概率 的映射，减少重复转化的 CPU 与内存开销。\nconst frequencyDictCache = new Map<string, Map<string, number>>();\n// 词频查询的 LRU 缓存，与 Python 版的 CACHE_SIZE 一致。\nconst wordFrequencyCache = new LRUCache<string, number>({ max: CACHE_SIZE });\n\nfunction normalizeWordlist(wordlist: string): string {\n  if (wordlist === \"combined\") {\n    return \"small\";\n  }\n  return wordlist;\n}\n\nfunction buildAvailableIndex(): Record<string, Record<string, string>> {\n  const dataDir = getDataDir();\n  const files = fs.readdirSync(dataDir);\n  const index: Record<string, Record<string, string>> = {};\n\n  files\n    .filter((name) => name.endsWith(\".msgpack.gz\") && !name.startsWith(\"_\"))\n    .forEach((name) => {\n      const [listName, lang] = name.replace(\".msgpack.gz\", \"\").split(\"_\");\n      if (!index[listName]) {\n        index[listName] = {};\n      }\n      index[listName][lang] = path.join(dataDir, name);\n    });\n\n  return index;\n}\n\nconst availableIndex = buildAvailableIndex();\n\n/**\n * 返回指定词表下可用的语言与文件路径映射。\n * 需求说明：API 与 Python 版一致，允许传入 best/large/small。\n */\nexport function availableLanguages(wordlist = DEFAULT_WORDLIST): Record<string, string> {\n  const normalized = normalizeWordlist(wordlist);\n  if (normalized === \"best\") {\n    const small = availableLanguages(\"small\");\n    const large = availableLanguages(\"large\");\n    return { ...small, ...large }; // large 优先覆盖 small\n  }\n\n  return { ...(availableIndex[normalized] ?? {}) };\n}\n\nfunction closestLanguage(requested: string, available: string[]): string | null {\n  const normalized = normalizeLanguageTag(requested);\n  if (available.includes(normalized)) {\n    return normalized;\n  }\n\n  const base = normalized.split(/[-_]/)[0];\n  if (available.includes(base)) {\n    return base;\n  }\n\n  const candidate = available.find((lang) => lang.startsWith(base));\n  return candidate ?? null;\n}\n\nexport function getFrequencyList(lang: string, wordlist = DEFAULT_WORDLIST): FrequencyBuckets {\n  const normalizedList = normalizeWordlist(wordlist);\n  const available = availableLanguages(normalizedList);\n  const languages = Object.keys(available);\n  const best = closestLanguage(lang, languages);\n\n  if (!best) {\n    throw new Error(`没有找到语言 ${lang} 的词表 ${normalizedList}`);\n  }\n\n  const cacheKey = `${best}:${normalizedList}`;\n  const cached = frequencyListCache.get(cacheKey);\n  if (cached) {\n    return cached;\n  }\n\n  const filePath = available[best];\n  const buckets = readCBPack(filePath);\n  frequencyListCache.set(cacheKey, buckets);\n  return buckets;\n}\n\n/**\n * 把频率桶转换成 token -> 概率的映射。\n * 注意：这里以 Map 存储，利于快速查找与节省内存。\n */\nexport function getFrequencyDict(lang: string, wordlist = DEFAULT_WORDLIST): Map<string, number> {\n  const normalizedList = normalizeWordlist(wordlist);\n  const cacheKey = `${lang}:${normalizedList}`;\n  const cached = frequencyDictCache.get(cacheKey);\n  if (cached) {\n    return cached;\n  }\n\n  const freqs = new Map<string, number>();\n  const pack = getFrequencyList(lang, normalizedList);\n  pack.forEach((bucket, index) => {\n    const freq = cBToFreq(-index);\n    bucket.forEach((word) => freqs.set(word, freq));\n  });\n\n  frequencyDictCache.set(cacheKey, freqs);\n  return freqs;\n}\n\nexport function* iterWordlist(lang: string, wordlist = DEFAULT_WORDLIST): Generator<string> {\n  const pack = getFrequencyList(lang, wordlist);\n  for (const bucket of pack) {\n    for (const word of bucket) {\n      yield word;\n    }\n  }\n}\n\nfunction roundToSignificantDigits(value: number, minimum: number): number {\n  // 词频只需要 1% 精度，因此按有效数字 3 位四舍五入，避免暴露无意义的小数噪声。\n  const unrounded = Math.max(value, minimum);\n  if (unrounded === 0) {\n    return 0;\n  }\n  const leadingZeroes = Math.floor(-Math.log10(unrounded));\n  const places = Math.max(0, leadingZeroes + 3);\n  const factor = 10 ** places;\n  return Math.round(unrounded * factor) / factor;\n}\n\n/**\n * 尝试用单字词频估算整词的词频（仅适用于中文）。\n * 需求说明：当 jieba 把词作为整词处理，但该词不在 wordfreq 词库中时，\n * 通过拆分为单字并组合它们的词频来给出一个合理的估算值。\n *\n * @param word - 要估算的词（已经过 smashNumbers 处理）\n * @param freqs - 词频字典\n * @returns 估算的词频概率，如果有单字也找不到则返回 null\n */\nfunction estimateChineseWordFrequency(\n  word: string,\n  freqs: Map<string, number>,\n): number | null {\n  // 单字不需要 fallback，直接返回 null 表示确实找不到\n  if (word.length <= 1) {\n    return null;\n  }\n\n  const chars = [...word];\n  let oneOverResult = 0;\n\n  for (const char of chars) {\n    const freq = freqs.get(char);\n    if (freq === undefined) {\n      // 有单字也找不到，放弃估算\n      return null;\n    }\n    oneOverResult += 1 / freq;\n  }\n\n  // 使用调和平均计算基础词频，再应用惩罚因子\n  // 惩罚因子：组合词的实际频率通常远低于单字频率的简单组合\n  const baseFreq = 1 / oneOverResult;\n  const penalty = CHAR_COMBINATION_PENALTY ** (chars.length - 1);\n  return baseFreq / penalty;\n}\n\n/**\n * 计算指定词在给定语言与词表下的概率（0~1）。\n * 逻辑基于 Python 版：分词 -> smash 数字 -> 频率合并 -> Jieba 补偿 -> 三位有效数字。\n *\n * 中文增强：当整词不在词库中时，会尝试用单字词频估算，提高覆盖率。\n */\nexport function wordFrequency(\n  word: string,\n  lang: string,\n  wordlist = DEFAULT_WORDLIST,\n  minimum = 0,\n): number {\n  const cacheKey = `${word}|${lang}|${wordlist}|${minimum}`;\n  const cached = wordFrequencyCache.get(cacheKey);\n  if (cached !== undefined) {\n    return cached;\n  }\n\n  const info = getLanguageInfo(lang);\n  const tokens = lossyTokenize(word, lang);\n  if (tokens.length === 0) {\n    wordFrequencyCache.set(cacheKey, minimum);\n    return minimum;\n  }\n\n  const freqs = getFrequencyDict(lang, wordlist);\n  const isChineseTokenizer = info.tokenizer === \"jieba\";\n  let oneOverResult = 0;\n\n  for (const token of tokens) {\n    const smashed = smashNumbers(token);\n    let freq = freqs.get(smashed);\n\n    // 如果词库中找不到该 token\n    if (freq === undefined) {\n      // 对于中文，尝试用单字词频估算\n      if (isChineseTokenizer) {\n        const estimated = estimateChineseWordFrequency(smashed, freqs);\n        if (estimated !== null) {\n          freq = estimated;\n        }\n      }\n\n      // 仍然找不到，返回 minimum\n      if (freq === undefined) {\n        wordFrequencyCache.set(cacheKey, minimum);\n        return minimum;\n      }\n    }\n\n    // 多 token 频率合并使用 1/f 累加的方式，与原版保持一致，可避免被单个低频词放大影响。\n    const adjusted = smashed === token ? freq : freq * digitFreq(token);\n    oneOverResult += 1 / adjusted;\n  }\n\n  let result = 1 / oneOverResult;\n  if (isChineseTokenizer && tokens.length > 1) {\n    result *= INFERRED_SPACE_FACTOR ** -(tokens.length - 1);\n  }\n\n  result = roundToSignificantDigits(result, minimum);\n  wordFrequencyCache.set(cacheKey, result);\n  return result;\n}\n\nexport function zipfFrequency(\n  word: string,\n  lang: string,\n  wordlist = DEFAULT_WORDLIST,\n  minimum = 0,\n): number {\n  const freqMin = zipfToFreq(minimum);\n  const freq = wordFrequency(word, lang, wordlist, freqMin);\n  return Number(freqToZipf(freq).toFixed(2));\n}\n\nexport function topNList(\n  lang: string,\n  n: number,\n  wordlist = DEFAULT_WORDLIST,\n  asciiOnly = false,\n): string[] {\n  const results: string[] = [];\n  for (const word of iterWordlist(lang, wordlist)) {\n    if (!asciiOnly || word <= \"~\") {\n      if (!hasDigitSequence(word)) {\n        results.push(word);\n        if (results.length >= n) {\n          break;\n        }\n      }\n    }\n  }\n  return results;\n}\n\nexport function randomWords(\n  lang = \"en\",\n  wordlist = DEFAULT_WORDLIST,\n  nwords = 5,\n  bitsPerWord = 12,\n  asciiOnly = false,\n): string {\n  const nChoices = 2 ** bitsPerWord;\n  const choices = topNList(lang, nChoices, wordlist, asciiOnly);\n  if (choices.length < nChoices) {\n    throw new Error(`词表不足以提供 ${bitsPerWord} bits 的熵值`);\n  }\n  const tokens: string[] = [];\n  for (let i = 0; i < nwords; i += 1) {\n    const idx = Math.floor(Math.random() * nChoices);\n    tokens.push(choices[idx]);\n  }\n  return tokens.join(\" \");\n}\n\nexport function randomAsciiWords(\n  lang = \"en\",\n  wordlist = DEFAULT_WORDLIST,\n  nwords = 5,\n  bitsPerWord = 12,\n): string {\n  return randomWords(lang, wordlist, nwords, bitsPerWord, true);\n}\n","import fs from \"node:fs\";\nimport path from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\n\n/**\n * 数据目录的环境变量名称。\n * 提供该变量是为了支持用户在不同部署环境下自定义词频数据位置，满足“可配置路径”这一需求。\n */\nexport const DATA_ENV_KEY = \"WORDFREQ_DATA\";\n\n/**\n * 解析出当前编译后文件所在目录，再向上回退到项目根目录，拼出默认的数据目录。\n * 这样设计是为了兼顾 ESM 与 CJS 输出：不依赖 __dirname，改用 import.meta.url 提供的绝对定位。\n */\nconst moduleDir =\n  typeof __dirname !== \"undefined\"\n    ? __dirname\n    : path.dirname(fileURLToPath(import.meta.url));\n\n// 说明：在 CJS 中 __dirname 本身已是当前文件所在目录，Bun 的 ESM 也会注入 __dirname。\n// 需求：定位到打包输出的 dist 目录，再回退到 data。若对 __dirname 再调一次 dirname 会误跳到上一级 node_modules，导致找不到内置数据。\n// 兼容源码运行场景（vitest 等），额外增加 ../../data 作为候选，确保 tests 从 src 入口跑时也能找到根级 data。\nconst defaultDataDir = path.resolve(moduleDir, \"../data\");\nconst sourceDataDir = path.resolve(moduleDir, \"../../data\");\n\n/**\n * 返回词频数据所在的目录。\n * 优先使用用户指定的环境变量，未指定时退回到包内自带的 data 目录。\n */\nexport function getDataDir(): string {\n  const override = process.env[DATA_ENV_KEY];\n  const candidates = [\n    override && override.trim().length > 0 ? path.resolve(override) : null,\n    sourceDataDir,\n    defaultDataDir,\n    path.resolve(process.cwd(), \"node_modules\", \"nodewordfreq\", \"data\"),\n    path.resolve(process.cwd(), \"data\"),\n  ].filter((p): p is string => Boolean(p));\n\n  for (const dir of candidates) {\n    if (fs.existsSync(dir)) {\n      return dir;\n    }\n  }\n\n  throw new Error(\n    `无法找到词频数据目录，请设置 ${DATA_ENV_KEY} 指向含有 msgpack.gz 数据的路径`,\n  );\n}\n\n/**\n * 拼接出具体数据文件的绝对路径，便于其他模块统一获取数据。\n */\nexport function getDataPath(filename: string): string {\n  return path.resolve(getDataDir(), filename);\n}\n","import fs from \"node:fs\";\nimport { gunzipSync } from \"node:zlib\";\n\nimport { Unpackr } from \"msgpackr\";\n\n/**\n * cBpack 解析器，用来把压缩的 msgpack 词频文件加载成内存结构。\n * 采用同步读取是为了与 Python 版一致的阻塞式 API，方便在计算词频的调用链里直接使用。\n * 如果需要流式读取以降低内存占用，可以在后续迭代中再扩展异步版本。\n */\nexport function readCBPack(filePath: string): string[][] {\n  const packed = fs.readFileSync(filePath);\n  const uncompressed = gunzipSync(packed);\n\n  const unpackr = new Unpackr({ useRecords: false });\n  const decoded = unpackr.unpack(uncompressed) as unknown[];\n  const [header, ...buckets] = decoded as [Record<string, unknown>, ...string[][]];\n\n  if (\n    !header ||\n    typeof header !== \"object\" ||\n    (header as Record<string, unknown>).format !== \"cB\" ||\n    (header as Record<string, unknown>).version !== 1\n  ) {\n    throw new Error(`非法的 cBpack 头部: ${JSON.stringify(header)}`);\n  }\n\n  return buckets;\n}\n","/**\n * 数字形态相关的概率估算逻辑。\n * 需求说明：为了避免为每个具体数字存储词频，原版通过将多位数字归一化为形态（例如 1234 -> 0000），\n * 再用 Benford 分布 + 年份分布估算具体数字的概率。本文件完整复刻该逻辑。\n */\n\n// Benford 分布（含前导零的估算），索引即首位数字。\nexport const DIGIT_FREQS = [0.009, 0.3, 0.175, 0.124, 0.096, 0.078, 0.066, 0.057, 0.05, 0.045];\n\n// 年份分布曲线的参数，来源于原版实验数据。\nconst YEAR_LOG_PEAK = -1.9185;\nconst NOT_YEAR_PROB = 0.1;\nconst REFERENCE_YEAR = 2019;\nconst PLATEAU_WIDTH = 20;\n\nconst DIGIT_RE = /\\d/g;\nconst MULTI_DIGIT_RE_GLOBAL = /\\d[\\d.,]+/g;\nconst MULTI_DIGIT_RE = /\\d[\\d.,]+/;\nconst PURE_DIGIT_RE_GLOBAL = /\\d+/g;\n\nexport function benfordFreq(text: string): number {\n  const firstDigit = Number.parseInt(text[0] ?? \"0\", 10);\n  return DIGIT_FREQS[firstDigit] / 10 ** (text.length - 1);\n}\n\nexport function yearFreq(text: string): number {\n  const year = Number.parseInt(text, 10);\n\n  let yearLogFreq = YEAR_LOG_PEAK;\n  if (year <= REFERENCE_YEAR) {\n    yearLogFreq = YEAR_LOG_PEAK - 0.0083 * (REFERENCE_YEAR - year);\n  } else if (year <= REFERENCE_YEAR + PLATEAU_WIDTH) {\n    yearLogFreq = YEAR_LOG_PEAK;\n  } else {\n    yearLogFreq = YEAR_LOG_PEAK - 0.2 * (year - (REFERENCE_YEAR + PLATEAU_WIDTH));\n  }\n\n  const yearProb = 10 ** yearLogFreq;\n  const notYearProb = NOT_YEAR_PROB * benfordFreq(text);\n  return yearProb + notYearProb;\n}\n\n/**\n * 返回数字字符串的相对频率估计值。\n * 在包含多段数字的 token（例如 12,345.67）中，会拆分每一段再乘积概率。\n */\nexport function digitFreq(text: string): number {\n  let freq = 1;\n  const matches = text.matchAll(MULTI_DIGIT_RE_GLOBAL);\n  for (const match of matches) {\n    const sub = match[0];\n    const pureDigits = sub.matchAll(PURE_DIGIT_RE_GLOBAL);\n    for (const digitMatch of pureDigits) {\n      const digits = digitMatch[0];\n      if (digits.length === 4) {\n        freq *= yearFreq(digits);\n      } else {\n        freq *= benfordFreq(digits);\n      }\n    }\n  }\n  return freq;\n}\n\n/**\n * 判断 token 中是否包含需要归一化处理的多位数字。\n */\nexport function hasDigitSequence(text: string): boolean {\n  return MULTI_DIGIT_RE.test(text);\n}\n\n/**\n * 将多位数字归一化为 0 占位形态。\n * 需求说明：这样做能在词表中以形态代表大量数字，节省存储空间并保持概率可估计。\n */\nexport function smashNumbers(text: string): string {\n  return text.replace(MULTI_DIGIT_RE_GLOBAL, (match) => match.replace(DIGIT_RE, \"0\"));\n}\n","import { createRequire } from \"node:module\";\n\nimport { simplifyChinese, getJiebaMainDictPath, getJiebaOrigDictPath } from \"../data/chinese\";\nimport { getLanguageInfo, TokenizerKind } from \"../utils/languageInfo\";\nimport { preprocessText } from \"../utils/preprocess\";\nimport { uncurlQuotes } from \"../utils/quotes\";\n\nconst PUNCT_RE = /[\\p{P}\\p{S}]+/u;\n\nconst segmenterCache = new Map<string, Intl.Segmenter>();\nconst requireFromEsm = typeof __dirname !== \"undefined\"\n  ? createRequire(`${__dirname}/`)\n  : createRequire(import.meta.url);\n\nlet nodeJieba: typeof import(\"nodejieba\") | null = null;\nlet jiebaMainLoaded = false;\nlet jiebaOrigLoaded = false;\n\nfunction loadNodeJieba(externalWordlist: boolean): typeof import(\"nodejieba\") | null {\n  let jieba = nodeJieba;\n  if (!jieba) {\n    try {\n      // 在 ESM 环境下需通过 createRequire 动态加载 CommonJS 包。\n      // 若未安装可选依赖，则捕获异常并回退至通用分词。\n      jieba = requireFromEsm(\"nodejieba\");\n      nodeJieba = jieba;\n    } catch {\n      return null;\n    }\n  }\n\n  if (!jieba) {\n    return null;\n  }\n\n  if (externalWordlist && !jiebaOrigLoaded) {\n    // 使用原版 jieba 词典（含词性标签），适合外部通用分词场景。\n    jieba.load({ dict: getJiebaOrigDictPath() });\n    jiebaOrigLoaded = true;\n  } else if (!externalWordlist && !jiebaMainLoaded) {\n    // wordfreq 的定制词典只有两列（词 + 频率），将其作为 userDict 叠加在内置主词典上，\n    // 避免触发 nodejieba 对主词典三列格式的校验错误。\n    jieba.load({ userDict: getJiebaMainDictPath() });\n    jiebaMainLoaded = true;\n  }\n\n  return jieba;\n}\n\nfunction getSegmenter(lang?: string): Intl.Segmenter {\n  const key = lang ?? \"und\";\n  const cached = segmenterCache.get(key);\n  if (cached) {\n    return cached;\n  }\n\n  const segmenter = new Intl.Segmenter(lang, { granularity: \"word\" });\n  segmenterCache.set(key, segmenter);\n  return segmenter;\n}\n\nfunction segmentWithIntl(text: string, lang?: string, includePunctuation = false): string[] {\n  const segmenter = getSegmenter(lang);\n  const tokens: string[] = [];\n\n  for (const item of segmenter.segment(text)) {\n    const segment = item.segment.trim();\n    if (!segment) {\n      continue;\n    }\n    if (!includePunctuation && item.isWordLike === false) {\n      continue;\n    }\n    tokens.push(segment.toLocaleLowerCase());\n  }\n\n  return tokens;\n}\n\nfunction tokenizeWithOptionalJieba(\n  text: string,\n  includePunctuation: boolean,\n  externalWordlist: boolean,\n): string[] {\n  const jieba = loadNodeJieba(externalWordlist);\n  if (!jieba) {\n    // 未安装可选依赖时，退回到 Intl 分词，保证 API 可用。\n    return segmentWithIntl(text, \"zh\", includePunctuation);\n  }\n\n  const result: string[] = externalWordlist ? jieba.cut(text, false) : jieba.cut(text, false);\n  const lowered = result.map((token) => token.toLocaleLowerCase(\"zh\"));\n  if (includePunctuation) {\n    return lowered;\n  }\n  return lowered.filter((token) => !PUNCT_RE.test(token));\n}\n\nfunction tokenizeByKind(\n  text: string,\n  tokenizer: TokenizerKind,\n  includePunctuation: boolean,\n  externalWordlist: boolean,\n  lang: string,\n): string[] {\n  if (tokenizer === \"jieba\") {\n    return tokenizeWithOptionalJieba(text, includePunctuation, externalWordlist);\n  }\n\n  if (tokenizer === \"mecab\") {\n    // Node 侧没有默认内置的 MeCab/kuromoji，这里使用 Intl.Segmenter 做兜底，\n    // 保证至少能按 Unicode 词边界拆分。\n    return segmentWithIntl(text, lang, includePunctuation);\n  }\n\n  return segmentWithIntl(text, lang, includePunctuation);\n}\n\n/**\n * 简单分词，直接依赖 Intl.Segmenter。\n * 需求说明：提供无语言上下文的轻量分词能力，兼容包含标点与否的两种场景。\n */\nexport function simpleTokenize(text: string, includePunctuation = false): string[] {\n  const normalized = text.normalize(\"NFC\");\n  return segmentWithIntl(normalized, undefined, includePunctuation);\n}\n\n/**\n * 针对指定语言进行分词，包含预处理与语言定制化的选择。\n * - 会先执行 preprocessText 统一大小写、转写等操作，确保与词表一致。\n * - CJK 场景优先尝试 nodejieba（可选依赖），缺失时退回 Intl。\n */\nexport function tokenize(\n  text: string,\n  lang: string,\n  includePunctuation = false,\n  externalWordlist = false,\n): string[] {\n  const info = getLanguageInfo(lang);\n  const normalized = preprocessText(text, lang);\n  return tokenizeByKind(normalized, info.tokenizer, includePunctuation, externalWordlist, lang);\n}\n\n/**\n * “损失性”分词，会在 tokenize 结果基础上做进一步归一化：\n * - 中文强制简体化，保持与词表键一致。\n * - 弯引号统一为直引号。\n */\nexport function lossyTokenize(\n  text: string,\n  lang: string,\n  includePunctuation = false,\n  externalWordlist = false,\n): string[] {\n  const info = getLanguageInfo(lang);\n  let tokens = tokenize(text, lang, includePunctuation, externalWordlist);\n\n  if (info.lookupTransliteration === \"zh-Hans\") {\n    tokens = tokens.map((token) => simplifyChinese(token));\n  }\n\n  return tokens.map((token) => uncurlQuotes(token));\n}\n","import fs from \"node:fs\";\nimport { gunzipSync } from \"node:zlib\";\n\nimport { Unpackr } from \"msgpackr\";\n\nimport { getDataPath } from \"./dataPath\";\n\ntype SimplifiedMap = Record<string, string>;\n\nlet cachedSimplifiedMap: SimplifiedMap | null = null;\n\n/**\n * 加载简繁转换表。\n * 数据源与 Python 版一致，存储在 `_chinese_mapping.msgpack.gz` 内。\n * 这里使用懒加载 + 缓存，避免每次 token 转换都重新解压，满足性能与启动延迟的折中需求。\n */\nfunction loadSimplifiedMap(): SimplifiedMap {\n  if (cachedSimplifiedMap) {\n    return cachedSimplifiedMap;\n  }\n\n  const buffer = fs.readFileSync(getDataPath(\"_chinese_mapping.msgpack.gz\"));\n  const uncompressed = gunzipSync(buffer);\n  const unpackr = new Unpackr({ useRecords: false });\n  const rawMap = unpackr.unpack(uncompressed) as Record<string, string>;\n  const mapping: SimplifiedMap = {};\n  for (const [codepoint, target] of Object.entries(rawMap)) {\n    const char = String.fromCodePoint(Number.parseInt(codepoint, 10));\n    mapping[char] = target;\n  }\n\n  cachedSimplifiedMap = mapping;\n  return mapping;\n}\n\n/**\n * 将文本逐字映射为简体，用于词频查找的归一化。\n * 需求说明：Python 版在 lossy_tokenize 中会强制把中文转换为简体，\n * 这样才能命中统一的词频表。这里保持相同行为。\n */\nexport function simplifyChinese(text: string): string {\n  const mapping = loadSimplifiedMap();\n  const simplified = Array.from(text, (char) => mapping[char] ?? char).join(\"\");\n  return simplified.toLocaleLowerCase(\"zh\");\n}\n\nexport function getJiebaMainDictPath(): string {\n  return getDataPath(\"jieba_zh.txt\");\n}\n\nexport function getJiebaOrigDictPath(): string {\n  return getDataPath(\"jieba_zh_orig.txt\");\n}\n","export type TokenizerKind = \"regex\" | \"jieba\" | \"mecab\" | null;\n\nexport interface LanguageInfo {\n  script: string | undefined;\n  tokenizer: TokenizerKind;\n  normalForm: \"NFC\" | \"NFKC\";\n  removeMarks: boolean;\n  dotlessI: boolean;\n  diacriticsUnder: \"cedillas\" | \"commas\" | null;\n  transliteration: string | null;\n  lookupTransliteration: string | null;\n}\n\n/**\n * 无空格书写的文字脚本列表。\n * 这些脚本如果用通用正则分词，会被拆成单字，严重影响效果，因此需要特殊对待。\n */\nexport const SPACELESS_SCRIPTS = [\n  \"Hira\",\n  \"Kana\",\n  \"Thai\",\n  \"Khmr\",\n  \"Laoo\",\n  \"Mymr\",\n  \"Tale\",\n  \"Talu\",\n  \"Lana\",\n];\n\nexport const EXTRA_JAPANESE_CHARACTERS = \"ー々〻〆\";\n\nfunction normalizeLocale(tag: string): Intl.Locale | null {\n  const safeTag = tag.replace(/_/g, \"-\");\n  try {\n    return new Intl.Locale(safeTag);\n  } catch {\n    return null;\n  }\n}\n\n/**\n * 统一语言标签格式，便于缓存命中与路径选择。\n */\nexport function normalizeLanguageTag(tag: string): string {\n  const locale = normalizeLocale(tag);\n  if (!locale) {\n    return tag.toLowerCase();\n  }\n  return locale.toString().toLowerCase();\n}\n\nfunction isLanguageInList(language: Intl.Locale, targets: string[]): boolean {\n  return targets.some((target) => {\n    const targetLocale = normalizeLocale(target);\n    return targetLocale?.language === language.language;\n  });\n}\n\n/**\n * 返回指定语言的分词与归一化设定。\n * 设计目的：集中管理各语言的脚本信息和特殊处理逻辑，避免分散到多个模块导致耦合。\n */\nexport function getLanguageInfo(tag: string): LanguageInfo {\n  const locale = normalizeLocale(tag) ?? new Intl.Locale(\"und\");\n  const maximized = locale.maximize();\n\n  const info: LanguageInfo = {\n    script: maximized.script,\n    tokenizer: \"regex\",\n    normalForm: \"NFKC\",\n    removeMarks: false,\n    dotlessI: false,\n    diacriticsUnder: null,\n    transliteration: null,\n    lookupTransliteration: null,\n  };\n\n  if (isLanguageInList(locale, [\"ja\", \"ko\"])) {\n    info.tokenizer = \"mecab\";\n  } else if (isLanguageInList(locale, [\"zh\", \"yue\"])) {\n    info.tokenizer = \"jieba\";\n  } else if (info.script && SPACELESS_SCRIPTS.includes(info.script)) {\n    info.tokenizer = null;\n  }\n\n  if (info.script && [\"Latn\", \"Grek\", \"Cyrl\"].includes(info.script)) {\n    info.normalForm = \"NFC\";\n  }\n\n  if (info.script && [\"Arab\", \"Hebr\"].includes(info.script)) {\n    info.removeMarks = true;\n  }\n\n  if (isLanguageInList(locale, [\"tr\", \"az\", \"kk\"])) {\n    info.dotlessI = true;\n    info.diacriticsUnder = \"cedillas\";\n  } else if (isLanguageInList(locale, [\"ro\"])) {\n    info.diacriticsUnder = \"commas\";\n  }\n\n  if (isLanguageInList(locale, [\"sr\"])) {\n    info.transliteration = \"sr-Latn\";\n  } else if (isLanguageInList(locale, [\"az\"])) {\n    info.transliteration = \"az-Latn\";\n  }\n\n  if (locale.language === \"zh\" && maximized.script !== \"Hant\") {\n    info.lookupTransliteration = \"zh-Hans\";\n  }\n\n  return info;\n}\n","/**\n * 简单的字符级转写实现，用于满足 Serbian/Azerbaijani 的罗马化需求。\n * 说明：这里实现的是最小可用的映射表，覆盖常见的西里尔字符；\n * 如果后续需要更完整的学术转写，可在此基础上扩展。\n */\nconst CYRILLIC_TO_LATIN: Record<string, string> = {\n  а: \"a\",\n  б: \"b\",\n  в: \"v\",\n  г: \"g\",\n  д: \"d\",\n  ђ: \"đ\",\n  е: \"e\",\n  ж: \"ž\",\n  з: \"z\",\n  и: \"i\",\n  ј: \"j\",\n  к: \"k\",\n  л: \"l\",\n  љ: \"lj\",\n  м: \"m\",\n  н: \"n\",\n  њ: \"nj\",\n  о: \"o\",\n  п: \"p\",\n  р: \"r\",\n  с: \"s\",\n  т: \"t\",\n  ћ: \"ć\",\n  у: \"u\",\n  ф: \"f\",\n  х: \"h\",\n  ц: \"c\",\n  ч: \"č\",\n  џ: \"dž\",\n  ш: \"š\",\n};\n\nfunction preserveCase(source: string, target: string): string {\n  if (source.toUpperCase() === source) {\n    return target.toUpperCase();\n  }\n  if (source[0] === source[0]?.toUpperCase()) {\n    return target.charAt(0).toUpperCase() + target.slice(1);\n  }\n  return target;\n}\n\nexport function transliterate(transliteration: string, text: string): string {\n  if (transliteration !== \"sr-Latn\" && transliteration !== \"az-Latn\") {\n    return text;\n  }\n\n  return Array.from(text)\n    .map((char) => {\n      const lower = char.toLowerCase();\n      const mapped = CYRILLIC_TO_LATIN[lower];\n      if (!mapped) {\n        return char;\n      }\n      return preserveCase(char, mapped);\n    })\n    .join(\"\");\n}\n","import { getLanguageInfo } from \"./languageInfo\";\nimport { transliterate } from \"./transliterate\";\n\nconst MARK_RE = /\\p{Mn}|\\u0640/gu;\n\nfunction removeMarks(text: string): string {\n  return text.replace(MARK_RE, \"\");\n}\n\nfunction casefoldWithIDots(text: string): string {\n  const normalized = text.normalize(\"NFC\").replace(/İ/g, \"i\").replace(/I/g, \"ı\");\n  return normalized.toLocaleLowerCase(\"tr\");\n}\n\nfunction commasToCedillas(text: string): string {\n  return text.replace(/\\u0219/g, \"\\u015f\").replace(/\\u021b/g, \"\\u0163\");\n}\n\nfunction cedillasToCommas(text: string): string {\n  return text.replace(/\\u015f/g, \"\\u0219\").replace(/\\u0163/g, \"\\u021b\");\n}\n\n/**\n * 归一化文本，以便后续分词与词频查找。\n * 需求说明：与 Python 版保持一致的步骤顺序，确保跨语言输入都能映射到同一词表。\n */\nexport function preprocessText(text: string, languageTag: string): string {\n  const info = getLanguageInfo(languageTag);\n  let normalized = text.normalize(info.normalForm);\n\n  if (info.transliteration) {\n    normalized = transliterate(info.transliteration, normalized);\n  }\n\n  if (info.removeMarks) {\n    normalized = removeMarks(normalized);\n  }\n\n  normalized = info.dotlessI ? casefoldWithIDots(normalized) : normalized.toLocaleLowerCase();\n\n  if (info.diacriticsUnder === \"commas\") {\n    normalized = cedillasToCommas(normalized);\n  } else if (info.diacriticsUnder === \"cedillas\") {\n    normalized = commasToCedillas(normalized);\n  }\n\n  return normalized;\n}\n","/**\n * 将各种弯引号、全角引号统一成半角直引号，便于与词表匹配。\n * 需求说明：原版在 lossy_tokenize 中会调用 ftfy 的 uncurl_quotes，这里保持等效处理。\n */\nexport function uncurlQuotes(text: string): string {\n  return text.replace(/[\\u2018\\u2019\\u2032]/g, \"'\").replace(/[\\u201C\\u201D\\u2033]/g, '\"');\n}\n","/**\n * 缓存与概率计算用到的核心常量。\n * 这些值直接来源于原始 Python 版本的行为，保持一致才能让词频计算结果对齐。\n */\nexport const CACHE_SIZE = 100000;\n\n/**\n * 在中文分词中，遇到推断出来的分词边界需要衰减概率。\n * 经验值为 10，表示每多一个推断出的空格，频率缩小 10 倍，避免分词把不存在的词拼出来。\n */\nexport const INFERRED_SPACE_FACTOR = 10;\n\n/**\n * 默认使用的词表名称。与 Python 版保持相同语义：优先 large，不存在则退回 small。\n */\nexport const DEFAULT_WORDLIST = \"best\";\n\n/**\n * 单字组合词频的惩罚因子。\n * 当整词不在词库中时，会尝试用单字词频估算，但组合词的实际频率通常低于单字频率的简单组合，\n * 因此需要额外的惩罚因子。值越大，估算出的词频越低。\n * 经验值为 3，表示每多一个字，额外衰减 3 倍。\n */\nexport const CHAR_COMBINATION_PENALTY = 3;\n","/**\n * 频率与 Zipf/centibel 之间的转换函数。\n * 需求说明：Node 版需与 Python 版保持同样的刻度换算，避免词频结果出现系统性偏差。\n */\nexport function cBToFreq(cB: number): number {\n  if (cB > 0) {\n    throw new Error(\"频率不可能是正的分贝数值\");\n  }\n  return 10 ** (cB / 100);\n}\n\nexport function cBToZipf(cB: number): number {\n  return (cB + 900) / 100;\n}\n\nexport function zipfToFreq(zipf: number): number {\n  return 10 ** zipf / 1e9;\n}\n\nexport function freqToZipf(freq: number): number {\n  return Math.log10(freq) + 9;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,IAAAA,kBAAe;AACf,IAAAC,oBAAiB;AAEjB,uBAAyB;;;ACHzB,qBAAe;AACf,uBAAiB;AACjB,sBAA8B;AAF9B;AAQO,IAAM,eAAe;AAM5B,IAAM,YACJ,OAAO,cAAc,cACjB,YACA,iBAAAC,QAAK,YAAQ,+BAAc,YAAY,GAAG,CAAC;AAKjD,IAAM,iBAAiB,iBAAAA,QAAK,QAAQ,WAAW,SAAS;AACxD,IAAM,gBAAgB,iBAAAA,QAAK,QAAQ,WAAW,YAAY;AAMnD,SAAS,aAAqB;AACnC,QAAM,WAAW,QAAQ,IAAI,YAAY;AACzC,QAAM,aAAa;AAAA,IACjB,YAAY,SAAS,KAAK,EAAE,SAAS,IAAI,iBAAAA,QAAK,QAAQ,QAAQ,IAAI;AAAA,IAClE;AAAA,IACA;AAAA,IACA,iBAAAA,QAAK,QAAQ,QAAQ,IAAI,GAAG,gBAAgB,gBAAgB,MAAM;AAAA,IAClE,iBAAAA,QAAK,QAAQ,QAAQ,IAAI,GAAG,MAAM;AAAA,EACpC,EAAE,OAAO,CAAC,MAAmB,QAAQ,CAAC,CAAC;AAEvC,aAAW,OAAO,YAAY;AAC5B,QAAI,eAAAC,QAAG,WAAW,GAAG,GAAG;AACtB,aAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,IAAI;AAAA,IACR,wFAAkB,YAAY;AAAA,EAChC;AACF;AAKO,SAAS,YAAY,UAA0B;AACpD,SAAO,iBAAAD,QAAK,QAAQ,WAAW,GAAG,QAAQ;AAC5C;;;ACvDA,IAAAE,kBAAe;AACf,uBAA2B;AAE3B,sBAAwB;AAOjB,SAAS,WAAW,UAA8B;AACvD,QAAM,SAAS,gBAAAC,QAAG,aAAa,QAAQ;AACvC,QAAM,mBAAe,6BAAW,MAAM;AAEtC,QAAM,UAAU,IAAI,wBAAQ,EAAE,YAAY,MAAM,CAAC;AACjD,QAAM,UAAU,QAAQ,OAAO,YAAY;AAC3C,QAAM,CAAC,QAAQ,GAAG,OAAO,IAAI;AAE7B,MACE,CAAC,UACD,OAAO,WAAW,YACjB,OAAmC,WAAW,QAC9C,OAAmC,YAAY,GAChD;AACA,UAAM,IAAI,MAAM,2CAAkB,KAAK,UAAU,MAAM,CAAC,EAAE;AAAA,EAC5D;AAEA,SAAO;AACT;;;ACrBO,IAAM,cAAc,CAAC,MAAO,KAAK,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,MAAM,KAAK;AAG7F,IAAM,gBAAgB;AACtB,IAAM,gBAAgB;AACtB,IAAM,iBAAiB;AACvB,IAAM,gBAAgB;AAEtB,IAAM,WAAW;AACjB,IAAM,wBAAwB;AAC9B,IAAM,iBAAiB;AACvB,IAAM,uBAAuB;AAEtB,SAAS,YAAY,MAAsB;AAChD,QAAM,aAAa,OAAO,SAAS,KAAK,CAAC,KAAK,KAAK,EAAE;AACrD,SAAO,YAAY,UAAU,IAAI,OAAO,KAAK,SAAS;AACxD;AAEO,SAAS,SAAS,MAAsB;AAC7C,QAAM,OAAO,OAAO,SAAS,MAAM,EAAE;AAErC,MAAI,cAAc;AAClB,MAAI,QAAQ,gBAAgB;AAC1B,kBAAc,gBAAgB,SAAU,iBAAiB;AAAA,EAC3D,WAAW,QAAQ,iBAAiB,eAAe;AACjD,kBAAc;AAAA,EAChB,OAAO;AACL,kBAAc,gBAAgB,OAAO,QAAQ,iBAAiB;AAAA,EAChE;AAEA,QAAM,WAAW,MAAM;AACvB,QAAM,cAAc,gBAAgB,YAAY,IAAI;AACpD,SAAO,WAAW;AACpB;AAMO,SAAS,UAAU,MAAsB;AAC9C,MAAI,OAAO;AACX,QAAM,UAAU,KAAK,SAAS,qBAAqB;AACnD,aAAW,SAAS,SAAS;AAC3B,UAAM,MAAM,MAAM,CAAC;AACnB,UAAM,aAAa,IAAI,SAAS,oBAAoB;AACpD,eAAW,cAAc,YAAY;AACnC,YAAM,SAAS,WAAW,CAAC;AAC3B,UAAI,OAAO,WAAW,GAAG;AACvB,gBAAQ,SAAS,MAAM;AAAA,MACzB,OAAO;AACL,gBAAQ,YAAY,MAAM;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAKO,SAAS,iBAAiB,MAAuB;AACtD,SAAO,eAAe,KAAK,IAAI;AACjC;AAMO,SAAS,aAAa,MAAsB;AACjD,SAAO,KAAK,QAAQ,uBAAuB,CAAC,UAAU,MAAM,QAAQ,UAAU,GAAG,CAAC;AACpF;;;AC7EA,yBAA8B;;;ACA9B,IAAAC,kBAAe;AACf,IAAAC,oBAA2B;AAE3B,IAAAC,mBAAwB;AAMxB,IAAI,sBAA4C;AAOhD,SAAS,oBAAmC;AAC1C,MAAI,qBAAqB;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,gBAAAC,QAAG,aAAa,YAAY,6BAA6B,CAAC;AACzE,QAAM,mBAAe,8BAAW,MAAM;AACtC,QAAM,UAAU,IAAI,yBAAQ,EAAE,YAAY,MAAM,CAAC;AACjD,QAAM,SAAS,QAAQ,OAAO,YAAY;AAC1C,QAAM,UAAyB,CAAC;AAChC,aAAW,CAAC,WAAW,MAAM,KAAK,OAAO,QAAQ,MAAM,GAAG;AACxD,UAAM,OAAO,OAAO,cAAc,OAAO,SAAS,WAAW,EAAE,CAAC;AAChE,YAAQ,IAAI,IAAI;AAAA,EAClB;AAEA,wBAAsB;AACtB,SAAO;AACT;AAOO,SAAS,gBAAgB,MAAsB;AACpD,QAAM,UAAU,kBAAkB;AAClC,QAAM,aAAa,MAAM,KAAK,MAAM,CAAC,SAAS,QAAQ,IAAI,KAAK,IAAI,EAAE,KAAK,EAAE;AAC5E,SAAO,WAAW,kBAAkB,IAAI;AAC1C;AAEO,SAAS,uBAA+B;AAC7C,SAAO,YAAY,cAAc;AACnC;AAEO,SAAS,uBAA+B;AAC7C,SAAO,YAAY,mBAAmB;AACxC;;;ACnCO,IAAM,oBAAoB;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAIA,SAAS,gBAAgB,KAAiC;AACxD,QAAM,UAAU,IAAI,QAAQ,MAAM,GAAG;AACrC,MAAI;AACF,WAAO,IAAI,KAAK,OAAO,OAAO;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAKO,SAAS,qBAAqB,KAAqB;AACxD,QAAM,SAAS,gBAAgB,GAAG;AAClC,MAAI,CAAC,QAAQ;AACX,WAAO,IAAI,YAAY;AAAA,EACzB;AACA,SAAO,OAAO,SAAS,EAAE,YAAY;AACvC;AAEA,SAAS,iBAAiB,UAAuB,SAA4B;AAC3E,SAAO,QAAQ,KAAK,CAAC,WAAW;AAC9B,UAAM,eAAe,gBAAgB,MAAM;AAC3C,WAAO,cAAc,aAAa,SAAS;AAAA,EAC7C,CAAC;AACH;AAMO,SAAS,gBAAgB,KAA2B;AACzD,QAAM,SAAS,gBAAgB,GAAG,KAAK,IAAI,KAAK,OAAO,KAAK;AAC5D,QAAM,YAAY,OAAO,SAAS;AAElC,QAAM,OAAqB;AAAA,IACzB,QAAQ,UAAU;AAAA,IAClB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,aAAa;AAAA,IACb,UAAU;AAAA,IACV,iBAAiB;AAAA,IACjB,iBAAiB;AAAA,IACjB,uBAAuB;AAAA,EACzB;AAEA,MAAI,iBAAiB,QAAQ,CAAC,MAAM,IAAI,CAAC,GAAG;AAC1C,SAAK,YAAY;AAAA,EACnB,WAAW,iBAAiB,QAAQ,CAAC,MAAM,KAAK,CAAC,GAAG;AAClD,SAAK,YAAY;AAAA,EACnB,WAAW,KAAK,UAAU,kBAAkB,SAAS,KAAK,MAAM,GAAG;AACjE,SAAK,YAAY;AAAA,EACnB;AAEA,MAAI,KAAK,UAAU,CAAC,QAAQ,QAAQ,MAAM,EAAE,SAAS,KAAK,MAAM,GAAG;AACjE,SAAK,aAAa;AAAA,EACpB;AAEA,MAAI,KAAK,UAAU,CAAC,QAAQ,MAAM,EAAE,SAAS,KAAK,MAAM,GAAG;AACzD,SAAK,cAAc;AAAA,EACrB;AAEA,MAAI,iBAAiB,QAAQ,CAAC,MAAM,MAAM,IAAI,CAAC,GAAG;AAChD,SAAK,WAAW;AAChB,SAAK,kBAAkB;AAAA,EACzB,WAAW,iBAAiB,QAAQ,CAAC,IAAI,CAAC,GAAG;AAC3C,SAAK,kBAAkB;AAAA,EACzB;AAEA,MAAI,iBAAiB,QAAQ,CAAC,IAAI,CAAC,GAAG;AACpC,SAAK,kBAAkB;AAAA,EACzB,WAAW,iBAAiB,QAAQ,CAAC,IAAI,CAAC,GAAG;AAC3C,SAAK,kBAAkB;AAAA,EACzB;AAEA,MAAI,OAAO,aAAa,QAAQ,UAAU,WAAW,QAAQ;AAC3D,SAAK,wBAAwB;AAAA,EAC/B;AAEA,SAAO;AACT;;;AC1GA,IAAM,oBAA4C;AAAA,EAChD,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AAAA,EACH,QAAG;AACL;AAEA,SAAS,aAAa,QAAgB,QAAwB;AAC5D,MAAI,OAAO,YAAY,MAAM,QAAQ;AACnC,WAAO,OAAO,YAAY;AAAA,EAC5B;AACA,MAAI,OAAO,CAAC,MAAM,OAAO,CAAC,GAAG,YAAY,GAAG;AAC1C,WAAO,OAAO,OAAO,CAAC,EAAE,YAAY,IAAI,OAAO,MAAM,CAAC;AAAA,EACxD;AACA,SAAO;AACT;AAEO,SAAS,cAAc,iBAAyB,MAAsB;AAC3E,MAAI,oBAAoB,aAAa,oBAAoB,WAAW;AAClE,WAAO;AAAA,EACT;AAEA,SAAO,MAAM,KAAK,IAAI,EACnB,IAAI,CAAC,SAAS;AACb,UAAM,QAAQ,KAAK,YAAY;AAC/B,UAAM,SAAS,kBAAkB,KAAK;AACtC,QAAI,CAAC,QAAQ;AACX,aAAO;AAAA,IACT;AACA,WAAO,aAAa,MAAM,MAAM;AAAA,EAClC,CAAC,EACA,KAAK,EAAE;AACZ;;;AC5DA,IAAM,UAAU,WAAC,mBAAc,IAAE;AAEjC,SAAS,YAAY,MAAsB;AACzC,SAAO,KAAK,QAAQ,SAAS,EAAE;AACjC;AAEA,SAAS,kBAAkB,MAAsB;AAC/C,QAAM,aAAa,KAAK,UAAU,KAAK,EAAE,QAAQ,MAAM,GAAG,EAAE,QAAQ,MAAM,QAAG;AAC7E,SAAO,WAAW,kBAAkB,IAAI;AAC1C;AAEA,SAAS,iBAAiB,MAAsB;AAC9C,SAAO,KAAK,QAAQ,WAAW,QAAQ,EAAE,QAAQ,WAAW,QAAQ;AACtE;AAEA,SAAS,iBAAiB,MAAsB;AAC9C,SAAO,KAAK,QAAQ,WAAW,QAAQ,EAAE,QAAQ,WAAW,QAAQ;AACtE;AAMO,SAAS,eAAe,MAAc,aAA6B;AACxE,QAAM,OAAO,gBAAgB,WAAW;AACxC,MAAI,aAAa,KAAK,UAAU,KAAK,UAAU;AAE/C,MAAI,KAAK,iBAAiB;AACxB,iBAAa,cAAc,KAAK,iBAAiB,UAAU;AAAA,EAC7D;AAEA,MAAI,KAAK,aAAa;AACpB,iBAAa,YAAY,UAAU;AAAA,EACrC;AAEA,eAAa,KAAK,WAAW,kBAAkB,UAAU,IAAI,WAAW,kBAAkB;AAE1F,MAAI,KAAK,oBAAoB,UAAU;AACrC,iBAAa,iBAAiB,UAAU;AAAA,EAC1C,WAAW,KAAK,oBAAoB,YAAY;AAC9C,iBAAa,iBAAiB,UAAU;AAAA,EAC1C;AAEA,SAAO;AACT;;;AC3CO,SAAS,aAAa,MAAsB;AACjD,SAAO,KAAK,QAAQ,yBAAyB,GAAG,EAAE,QAAQ,yBAAyB,GAAG;AACxF;;;ALNA,IAAAC,eAAA;AAOA,IAAM,WAAW;AAEjB,IAAM,iBAAiB,oBAAI,IAA4B;AACvD,IAAM,iBAAiB,OAAO,cAAc,kBACxC,kCAAc,GAAG,SAAS,GAAG,QAC7B,kCAAcA,aAAY,GAAG;AAEjC,IAAI,YAA+C;AACnD,IAAI,kBAAkB;AACtB,IAAI,kBAAkB;AAEtB,SAAS,cAAc,kBAA8D;AACnF,MAAI,QAAQ;AACZ,MAAI,CAAC,OAAO;AACV,QAAI;AAGF,cAAQ,eAAe,WAAW;AAClC,kBAAY;AAAA,IACd,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,MAAI,oBAAoB,CAAC,iBAAiB;AAExC,UAAM,KAAK,EAAE,MAAM,qBAAqB,EAAE,CAAC;AAC3C,sBAAkB;AAAA,EACpB,WAAW,CAAC,oBAAoB,CAAC,iBAAiB;AAGhD,UAAM,KAAK,EAAE,UAAU,qBAAqB,EAAE,CAAC;AAC/C,sBAAkB;AAAA,EACpB;AAEA,SAAO;AACT;AAEA,SAAS,aAAa,MAA+B;AACnD,QAAM,MAAM,QAAQ;AACpB,QAAM,SAAS,eAAe,IAAI,GAAG;AACrC,MAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAEA,QAAM,YAAY,IAAI,KAAK,UAAU,MAAM,EAAE,aAAa,OAAO,CAAC;AAClE,iBAAe,IAAI,KAAK,SAAS;AACjC,SAAO;AACT;AAEA,SAAS,gBAAgB,MAAc,MAAe,qBAAqB,OAAiB;AAC1F,QAAM,YAAY,aAAa,IAAI;AACnC,QAAM,SAAmB,CAAC;AAE1B,aAAW,QAAQ,UAAU,QAAQ,IAAI,GAAG;AAC1C,UAAM,UAAU,KAAK,QAAQ,KAAK;AAClC,QAAI,CAAC,SAAS;AACZ;AAAA,IACF;AACA,QAAI,CAAC,sBAAsB,KAAK,eAAe,OAAO;AACpD;AAAA,IACF;AACA,WAAO,KAAK,QAAQ,kBAAkB,CAAC;AAAA,EACzC;AAEA,SAAO;AACT;AAEA,SAAS,0BACP,MACA,oBACA,kBACU;AACV,QAAM,QAAQ,cAAc,gBAAgB;AAC5C,MAAI,CAAC,OAAO;AAEV,WAAO,gBAAgB,MAAM,MAAM,kBAAkB;AAAA,EACvD;AAEA,QAAM,SAAmB,mBAAmB,MAAM,IAAI,MAAM,KAAK,IAAI,MAAM,IAAI,MAAM,KAAK;AAC1F,QAAM,UAAU,OAAO,IAAI,CAAC,UAAU,MAAM,kBAAkB,IAAI,CAAC;AACnE,MAAI,oBAAoB;AACtB,WAAO;AAAA,EACT;AACA,SAAO,QAAQ,OAAO,CAAC,UAAU,CAAC,SAAS,KAAK,KAAK,CAAC;AACxD;AAEA,SAAS,eACP,MACA,WACA,oBACA,kBACA,MACU;AACV,MAAI,cAAc,SAAS;AACzB,WAAO,0BAA0B,MAAM,oBAAoB,gBAAgB;AAAA,EAC7E;AAEA,MAAI,cAAc,SAAS;AAGzB,WAAO,gBAAgB,MAAM,MAAM,kBAAkB;AAAA,EACvD;AAEA,SAAO,gBAAgB,MAAM,MAAM,kBAAkB;AACvD;AAMO,SAAS,eAAe,MAAc,qBAAqB,OAAiB;AACjF,QAAM,aAAa,KAAK,UAAU,KAAK;AACvC,SAAO,gBAAgB,YAAY,QAAW,kBAAkB;AAClE;AAOO,SAAS,SACd,MACA,MACA,qBAAqB,OACrB,mBAAmB,OACT;AACV,QAAM,OAAO,gBAAgB,IAAI;AACjC,QAAM,aAAa,eAAe,MAAM,IAAI;AAC5C,SAAO,eAAe,YAAY,KAAK,WAAW,oBAAoB,kBAAkB,IAAI;AAC9F;AAOO,SAAS,cACd,MACA,MACA,qBAAqB,OACrB,mBAAmB,OACT;AACV,QAAM,OAAO,gBAAgB,IAAI;AACjC,MAAI,SAAS,SAAS,MAAM,MAAM,oBAAoB,gBAAgB;AAEtE,MAAI,KAAK,0BAA0B,WAAW;AAC5C,aAAS,OAAO,IAAI,CAAC,UAAU,gBAAgB,KAAK,CAAC;AAAA,EACvD;AAEA,SAAO,OAAO,IAAI,CAAC,UAAU,aAAa,KAAK,CAAC;AAClD;;;AM9JO,IAAM,aAAa;AAMnB,IAAM,wBAAwB;AAK9B,IAAM,mBAAmB;AAQzB,IAAM,2BAA2B;;;ACnBjC,SAAS,SAAS,IAAoB;AAC3C,MAAI,KAAK,GAAG;AACV,UAAM,IAAI,MAAM,0EAAc;AAAA,EAChC;AACA,SAAO,OAAO,KAAK;AACrB;AAEO,SAAS,SAAS,IAAoB;AAC3C,UAAQ,KAAK,OAAO;AACtB;AAEO,SAAS,WAAW,MAAsB;AAC/C,SAAO,MAAM,OAAO;AACtB;AAEO,SAAS,WAAW,MAAsB;AAC/C,SAAO,KAAK,MAAM,IAAI,IAAI;AAC5B;;;AXCA,IAAM,qBAAqB,oBAAI,IAA8B;AAE7D,IAAM,qBAAqB,oBAAI,IAAiC;AAEhE,IAAM,qBAAqB,IAAI,0BAAyB,EAAE,KAAK,WAAW,CAAC;AAE3E,SAAS,kBAAkB,UAA0B;AACnD,MAAI,aAAa,YAAY;AAC3B,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAEA,SAAS,sBAA8D;AACrE,QAAM,UAAU,WAAW;AAC3B,QAAM,QAAQ,gBAAAC,QAAG,YAAY,OAAO;AACpC,QAAM,QAAgD,CAAC;AAEvD,QACG,OAAO,CAAC,SAAS,KAAK,SAAS,aAAa,KAAK,CAAC,KAAK,WAAW,GAAG,CAAC,EACtE,QAAQ,CAAC,SAAS;AACjB,UAAM,CAAC,UAAU,IAAI,IAAI,KAAK,QAAQ,eAAe,EAAE,EAAE,MAAM,GAAG;AAClE,QAAI,CAAC,MAAM,QAAQ,GAAG;AACpB,YAAM,QAAQ,IAAI,CAAC;AAAA,IACrB;AACA,UAAM,QAAQ,EAAE,IAAI,IAAI,kBAAAC,QAAK,KAAK,SAAS,IAAI;AAAA,EACjD,CAAC;AAEH,SAAO;AACT;AAEA,IAAM,iBAAiB,oBAAoB;AAMpC,SAAS,mBAAmB,WAAW,kBAA0C;AACtF,QAAM,aAAa,kBAAkB,QAAQ;AAC7C,MAAI,eAAe,QAAQ;AACzB,UAAM,QAAQ,mBAAmB,OAAO;AACxC,UAAM,QAAQ,mBAAmB,OAAO;AACxC,WAAO,EAAE,GAAG,OAAO,GAAG,MAAM;AAAA,EAC9B;AAEA,SAAO,EAAE,GAAI,eAAe,UAAU,KAAK,CAAC,EAAG;AACjD;AAEA,SAAS,gBAAgB,WAAmB,WAAoC;AAC9E,QAAM,aAAa,qBAAqB,SAAS;AACjD,MAAI,UAAU,SAAS,UAAU,GAAG;AAClC,WAAO;AAAA,EACT;AAEA,QAAM,OAAO,WAAW,MAAM,MAAM,EAAE,CAAC;AACvC,MAAI,UAAU,SAAS,IAAI,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,QAAM,YAAY,UAAU,KAAK,CAAC,SAAS,KAAK,WAAW,IAAI,CAAC;AAChE,SAAO,aAAa;AACtB;AAEO,SAAS,iBAAiB,MAAc,WAAW,kBAAoC;AAC5F,QAAM,iBAAiB,kBAAkB,QAAQ;AACjD,QAAM,YAAY,mBAAmB,cAAc;AACnD,QAAM,YAAY,OAAO,KAAK,SAAS;AACvC,QAAM,OAAO,gBAAgB,MAAM,SAAS;AAE5C,MAAI,CAAC,MAAM;AACT,UAAM,IAAI,MAAM,wCAAU,IAAI,uBAAQ,cAAc,EAAE;AAAA,EACxD;AAEA,QAAM,WAAW,GAAG,IAAI,IAAI,cAAc;AAC1C,QAAM,SAAS,mBAAmB,IAAI,QAAQ;AAC9C,MAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAEA,QAAM,WAAW,UAAU,IAAI;AAC/B,QAAM,UAAU,WAAW,QAAQ;AACnC,qBAAmB,IAAI,UAAU,OAAO;AACxC,SAAO;AACT;AAMO,SAAS,iBAAiB,MAAc,WAAW,kBAAuC;AAC/F,QAAM,iBAAiB,kBAAkB,QAAQ;AACjD,QAAM,WAAW,GAAG,IAAI,IAAI,cAAc;AAC1C,QAAM,SAAS,mBAAmB,IAAI,QAAQ;AAC9C,MAAI,QAAQ;AACV,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,oBAAI,IAAoB;AACtC,QAAM,OAAO,iBAAiB,MAAM,cAAc;AAClD,OAAK,QAAQ,CAAC,QAAQ,UAAU;AAC9B,UAAM,OAAO,SAAS,CAAC,KAAK;AAC5B,WAAO,QAAQ,CAAC,SAAS,MAAM,IAAI,MAAM,IAAI,CAAC;AAAA,EAChD,CAAC;AAED,qBAAmB,IAAI,UAAU,KAAK;AACtC,SAAO;AACT;AAEO,UAAU,aAAa,MAAc,WAAW,kBAAqC;AAC1F,QAAM,OAAO,iBAAiB,MAAM,QAAQ;AAC5C,aAAW,UAAU,MAAM;AACzB,eAAW,QAAQ,QAAQ;AACzB,YAAM;AAAA,IACR;AAAA,EACF;AACF;AAEA,SAAS,yBAAyB,OAAe,SAAyB;AAExE,QAAM,YAAY,KAAK,IAAI,OAAO,OAAO;AACzC,MAAI,cAAc,GAAG;AACnB,WAAO;AAAA,EACT;AACA,QAAM,gBAAgB,KAAK,MAAM,CAAC,KAAK,MAAM,SAAS,CAAC;AACvD,QAAM,SAAS,KAAK,IAAI,GAAG,gBAAgB,CAAC;AAC5C,QAAM,SAAS,MAAM;AACrB,SAAO,KAAK,MAAM,YAAY,MAAM,IAAI;AAC1C;AAWA,SAAS,6BACP,MACA,OACe;AAEf,MAAI,KAAK,UAAU,GAAG;AACpB,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,CAAC,GAAG,IAAI;AACtB,MAAI,gBAAgB;AAEpB,aAAW,QAAQ,OAAO;AACxB,UAAM,OAAO,MAAM,IAAI,IAAI;AAC3B,QAAI,SAAS,QAAW;AAEtB,aAAO;AAAA,IACT;AACA,qBAAiB,IAAI;AAAA,EACvB;AAIA,QAAM,WAAW,IAAI;AACrB,QAAM,UAAU,6BAA6B,MAAM,SAAS;AAC5D,SAAO,WAAW;AACpB;AAQO,SAAS,cACd,MACA,MACA,WAAW,kBACX,UAAU,GACF;AACR,QAAM,WAAW,GAAG,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI,OAAO;AACvD,QAAM,SAAS,mBAAmB,IAAI,QAAQ;AAC9C,MAAI,WAAW,QAAW;AACxB,WAAO;AAAA,EACT;AAEA,QAAM,OAAO,gBAAgB,IAAI;AACjC,QAAM,SAAS,cAAc,MAAM,IAAI;AACvC,MAAI,OAAO,WAAW,GAAG;AACvB,uBAAmB,IAAI,UAAU,OAAO;AACxC,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,iBAAiB,MAAM,QAAQ;AAC7C,QAAM,qBAAqB,KAAK,cAAc;AAC9C,MAAI,gBAAgB;AAEpB,aAAW,SAAS,QAAQ;AAC1B,UAAM,UAAU,aAAa,KAAK;AAClC,QAAI,OAAO,MAAM,IAAI,OAAO;AAG5B,QAAI,SAAS,QAAW;AAEtB,UAAI,oBAAoB;AACtB,cAAM,YAAY,6BAA6B,SAAS,KAAK;AAC7D,YAAI,cAAc,MAAM;AACtB,iBAAO;AAAA,QACT;AAAA,MACF;AAGA,UAAI,SAAS,QAAW;AACtB,2BAAmB,IAAI,UAAU,OAAO;AACxC,eAAO;AAAA,MACT;AAAA,IACF;AAGA,UAAM,WAAW,YAAY,QAAQ,OAAO,OAAO,UAAU,KAAK;AAClE,qBAAiB,IAAI;AAAA,EACvB;AAEA,MAAI,SAAS,IAAI;AACjB,MAAI,sBAAsB,OAAO,SAAS,GAAG;AAC3C,cAAU,yBAAyB,EAAE,OAAO,SAAS;AAAA,EACvD;AAEA,WAAS,yBAAyB,QAAQ,OAAO;AACjD,qBAAmB,IAAI,UAAU,MAAM;AACvC,SAAO;AACT;AAEO,SAAS,cACd,MACA,MACA,WAAW,kBACX,UAAU,GACF;AACR,QAAM,UAAU,WAAW,OAAO;AAClC,QAAM,OAAO,cAAc,MAAM,MAAM,UAAU,OAAO;AACxD,SAAO,OAAO,WAAW,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC3C;AAEO,SAAS,SACd,MACA,GACA,WAAW,kBACX,YAAY,OACF;AACV,QAAM,UAAoB,CAAC;AAC3B,aAAW,QAAQ,aAAa,MAAM,QAAQ,GAAG;AAC/C,QAAI,CAAC,aAAa,QAAQ,KAAK;AAC7B,UAAI,CAAC,iBAAiB,IAAI,GAAG;AAC3B,gBAAQ,KAAK,IAAI;AACjB,YAAI,QAAQ,UAAU,GAAG;AACvB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,YACd,OAAO,MACP,WAAW,kBACX,SAAS,GACT,cAAc,IACd,YAAY,OACJ;AACR,QAAM,WAAW,KAAK;AACtB,QAAM,UAAU,SAAS,MAAM,UAAU,UAAU,SAAS;AAC5D,MAAI,QAAQ,SAAS,UAAU;AAC7B,UAAM,IAAI,MAAM,8CAAW,WAAW,0BAAW;AAAA,EACnD;AACA,QAAM,SAAmB,CAAC;AAC1B,WAAS,IAAI,GAAG,IAAI,QAAQ,KAAK,GAAG;AAClC,UAAM,MAAM,KAAK,MAAM,KAAK,OAAO,IAAI,QAAQ;AAC/C,WAAO,KAAK,QAAQ,GAAG,CAAC;AAAA,EAC1B;AACA,SAAO,OAAO,KAAK,GAAG;AACxB;AAEO,SAAS,iBACd,OAAO,MACP,WAAW,kBACX,SAAS,GACT,cAAc,IACN;AACR,SAAO,YAAY,MAAM,UAAU,QAAQ,aAAa,IAAI;AAC9D;","names":["import_node_fs","import_node_path","path","fs","import_node_fs","fs","import_node_fs","import_node_zlib","import_msgpackr","fs","import_meta","fs","path"]}