{"version":3,"file":"phrase-matching.cjs","sources":["../../../src/core/phrase-matching.ts"],"sourcesContent":["/**\n * Phrase matching algorithms for multi-word query support\n */\n\nimport { calculateLevenshteinDistance, calculateDamerauLevenshteinDistance } from \"../algorithms/levenshtein.js\";\nimport { tokenize } from \"../utils/tokenizer.js\";\n\nexport interface PhraseMatchOptions {\n  /** Require exact phrase match (no typos) */\n  exactMatch?: boolean;\n  /** Maximum edit distance per word in phrase */\n  maxEditDistance?: number;\n  /** Score multiplier for phrase matches */\n  proximityBonus?: number;\n  /** Maximum words between phrase words for proximity match */\n  maxProximityDistance?: number;\n  /** Use Damerau-Levenshtein (transpositions) */\n  useTranspositions?: boolean;\n}\n\nexport interface PhraseMatchResult {\n  /** Whether phrase was found */\n  matched: boolean;\n  /** Match score (0-1) */\n  score: number;\n  /** Type of match */\n  matchType: \"exact\" | \"fuzzy\" | \"proximity\" | \"none\";\n  /** Start position in text */\n  startPos?: number;\n  /** End position in text */\n  endPos?: number;\n  /** Words that matched */\n  matchedWords?: string[];\n}\n\nconst DEFAULT_OPTIONS: Required<PhraseMatchOptions> = {\n  exactMatch: false,\n  maxEditDistance: 1,\n  proximityBonus: 1.5,\n  maxProximityDistance: 3,\n  useTranspositions: false,\n};\n\n/**\n * Match a phrase in text with various strategies\n */\nexport function matchPhrase(\n  //\n  text: string,\n  phrase: string,\n  options: PhraseMatchOptions = {}\n): PhraseMatchResult {\n  const opts = { ...DEFAULT_OPTIONS, ...options };\n\n  if (!text || !phrase) {\n    return { matched: false, score: 0, matchType: \"none\" };\n  }\n\n  const normalizedText = text.toLowerCase();\n  const normalizedPhrase = phrase.toLowerCase();\n\n  // Strategy 1: Exact phrase match (highest score)\n  const exactMatch = findExactPhrase(normalizedText, normalizedPhrase);\n  if (exactMatch.matched) {\n    return { ...exactMatch, score: 1.0, matchType: \"exact\" };\n  }\n\n  // If exact match required, stop here\n  if (opts.exactMatch) {\n    return { matched: false, score: 0, matchType: \"none\" };\n  }\n\n  // Strategy 2: Fuzzy phrase match (allow typos)\n  const fuzzyMatch = findFuzzyPhrase(normalizedText, normalizedPhrase, opts.maxEditDistance, opts.useTranspositions);\n  if (fuzzyMatch.matched) {\n    return { ...fuzzyMatch, matchType: \"fuzzy\" };\n  }\n\n  // Strategy 3: Proximity match (words nearby)\n  const proximityMatch = findProximityMatch(normalizedText, normalizedPhrase, opts.maxProximityDistance);\n  if (proximityMatch.matched) {\n    return { ...proximityMatch, matchType: \"proximity\" };\n  }\n\n  return { matched: false, score: 0, matchType: \"none\" };\n}\n\n/**\n * Find exact phrase in text\n */\nfunction findExactPhrase(\n  //\n  text: string,\n  phrase: string\n): PhraseMatchResult {\n  const index = text.indexOf(phrase);\n\n  if (index !== -1) {\n    return {\n      matched: true,\n      score: 1.0,\n      matchType: \"exact\",\n      startPos: index,\n      endPos: index + phrase.length,\n    };\n  }\n\n  return { matched: false, score: 0, matchType: \"none\" };\n}\n\n/**\n * Find phrase with fuzzy matching (allow typos)\n */\nfunction findFuzzyPhrase(\n  //\n  text: string,\n  phrase: string,\n  maxEditDistance: number,\n  useTranspositions: boolean\n): PhraseMatchResult {\n  // Use centralized tokenizer for consistent word boundary handling\n  const phraseWords = tokenize(phrase, { lowercase: true });\n  const textWords = tokenize(text, { lowercase: true });\n\n  // Try to find consecutive words that match the phrase\n  for (let i = 0; i <= textWords.length - phraseWords.length; i++) {\n    const segment = textWords.slice(i, i + phraseWords.length);\n\n    // Check if this segment matches the phrase with fuzzy matching\n    let totalDistance = 0;\n    let allMatch = true;\n\n    for (let j = 0; j < phraseWords.length; j++) {\n      const distance = useTranspositions ? calculateDamerauLevenshteinDistance(phraseWords[j], segment[j], maxEditDistance) : calculateLevenshteinDistance(phraseWords[j], segment[j], maxEditDistance);\n\n      if (distance > maxEditDistance) {\n        allMatch = false;\n        break;\n      }\n      totalDistance += distance;\n    }\n\n    if (allMatch) {\n      // Calculate score based on edit distance\n      const maxPossibleDistance = phraseWords.length * maxEditDistance;\n      const score = maxPossibleDistance > 0 ? 0.7 + 0.2 * (1 - totalDistance / maxPossibleDistance) : 0.9;\n\n      return {\n        matched: true,\n        score,\n        matchType: \"fuzzy\",\n        matchedWords: segment,\n      };\n    }\n  }\n\n  return { matched: false, score: 0, matchType: \"none\" };\n}\n\n/**\n * Find words in proximity (nearby but not necessarily consecutive)\n */\nfunction findProximityMatch(\n  //\n  text: string,\n  phrase: string,\n  maxDistance: number\n): PhraseMatchResult {\n  // Use centralized tokenizer for consistent word boundary handling\n  const phraseWords = tokenize(phrase, { lowercase: true });\n  const textWords = tokenize(text, { lowercase: true });\n\n  // Find positions of each phrase word in text\n  const positions: number[][] = phraseWords.map(() => []);\n\n  textWords.forEach((word, index) => {\n    phraseWords.forEach((phraseWord, phraseIndex) => {\n      if (word === phraseWord || word.includes(phraseWord) || phraseWord.includes(word)) {\n        positions[phraseIndex].push(index);\n      }\n    });\n  });\n\n  // Check if all words were found\n  if (positions.some((p) => p.length === 0)) {\n    return { matched: false, score: 0, matchType: \"none\" };\n  }\n\n  // Find the best combination where words are close together\n  let bestDistance = Infinity;\n  let bestPositions: number[] = [];\n\n  function findBestCombination(wordIndex: number, currentPositions: number[]): void {\n    if (wordIndex === phraseWords.length) {\n      // Calculate total distance\n      const sorted = [...currentPositions].sort((a, b) => a - b);\n      const distance = sorted[sorted.length - 1] - sorted[0];\n\n      if (distance < bestDistance) {\n        bestDistance = distance;\n        bestPositions = [...currentPositions];\n      }\n      return;\n    }\n\n    for (const pos of positions[wordIndex]) {\n      findBestCombination(wordIndex + 1, [...currentPositions, pos]);\n    }\n  }\n\n  findBestCombination(0, []);\n\n  // Check if words are within max distance\n  if (bestDistance <= maxDistance) {\n    // Score based on proximity (closer = higher score)\n    const score = 0.5 + 0.2 * (1 - bestDistance / maxDistance);\n\n    return {\n      matched: true,\n      score,\n      matchType: \"proximity\",\n      matchedWords: bestPositions.map((i) => textWords[i]),\n    };\n  }\n\n  return { matched: false, score: 0, matchType: \"none\" };\n}\n\n/**\n * Calculate phrase match score for a text\n * Returns 0 if no match, or a boosted score if phrase matches\n */\nexport function calculatePhraseScore(\n  //\n  text: string,\n  phrase: string,\n  baseScore: number,\n  options: PhraseMatchOptions = {}\n): number {\n  const match = matchPhrase(text, phrase, options);\n\n  if (!match.matched) {\n    return 0;\n  }\n\n  // Apply proximity bonus\n  const bonus = options.proximityBonus || 1.5;\n  return Math.min(1.0, baseScore * match.score * bonus);\n}\n"],"names":["tokenize","calculateDamerauLevenshteinDistance","calculateLevenshteinDistance"],"mappings":";;;;AAmCA,MAAM,kBAAgD;AAAA,EACpD,YAAY;AAAA,EACZ,iBAAiB;AAAA,EACjB,gBAAgB;AAAA,EAChB,sBAAsB;AAAA,EACtB,mBAAmB;AACrB;AAKO,SAAS,YAEd,MACA,QACA,UAA8B,CAAA,GACX;AACnB,QAAM,OAAO,EAAE,GAAG,iBAAiB,GAAG,QAAA;AAEtC,MAAI,CAAC,QAAQ,CAAC,QAAQ;AACpB,WAAO,EAAE,SAAS,OAAO,OAAO,GAAG,WAAW,OAAA;AAAA,EAChD;AAEA,QAAM,iBAAiB,KAAK,YAAA;AAC5B,QAAM,mBAAmB,OAAO,YAAA;AAGhC,QAAM,aAAa,gBAAgB,gBAAgB,gBAAgB;AACnE,MAAI,WAAW,SAAS;AACtB,WAAO,EAAE,GAAG,YAAY,OAAO,GAAK,WAAW,QAAA;AAAA,EACjD;AAGA,MAAI,KAAK,YAAY;AACnB,WAAO,EAAE,SAAS,OAAO,OAAO,GAAG,WAAW,OAAA;AAAA,EAChD;AAGA,QAAM,aAAa,gBAAgB,gBAAgB,kBAAkB,KAAK,iBAAiB,KAAK,iBAAiB;AACjH,MAAI,WAAW,SAAS;AACtB,WAAO,EAAE,GAAG,YAAY,WAAW,QAAA;AAAA,EACrC;AAGA,QAAM,iBAAiB,mBAAmB,gBAAgB,kBAAkB,KAAK,oBAAoB;AACrG,MAAI,eAAe,SAAS;AAC1B,WAAO,EAAE,GAAG,gBAAgB,WAAW,YAAA;AAAA,EACzC;AAEA,SAAO,EAAE,SAAS,OAAO,OAAO,GAAG,WAAW,OAAA;AAChD;AAKA,SAAS,gBAEP,MACA,QACmB;AACnB,QAAM,QAAQ,KAAK,QAAQ,MAAM;AAEjC,MAAI,UAAU,IAAI;AAChB,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,MACP,WAAW;AAAA,MACX,UAAU;AAAA,MACV,QAAQ,QAAQ,OAAO;AAAA,IAAA;AAAA,EAE3B;AAEA,SAAO,EAAE,SAAS,OAAO,OAAO,GAAG,WAAW,OAAA;AAChD;AAKA,SAAS,gBAEP,MACA,QACA,iBACA,mBACmB;AAEnB,QAAM,cAAcA,UAAAA,SAAS,QAAQ,EAAE,WAAW,MAAM;AACxD,QAAM,YAAYA,UAAAA,SAAS,MAAM,EAAE,WAAW,MAAM;AAGpD,WAAS,IAAI,GAAG,KAAK,UAAU,SAAS,YAAY,QAAQ,KAAK;AAC/D,UAAM,UAAU,UAAU,MAAM,GAAG,IAAI,YAAY,MAAM;AAGzD,QAAI,gBAAgB;AACpB,QAAI,WAAW;AAEf,aAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,YAAM,WAAW,oBAAoBC,YAAAA,oCAAoC,YAAY,CAAC,GAAG,QAAQ,CAAC,GAAG,eAAe,IAAIC,YAAAA,6BAA6B,YAAY,CAAC,GAAG,QAAQ,CAAC,GAAG,eAAe;AAEhM,UAAI,WAAW,iBAAiB;AAC9B,mBAAW;AACX;AAAA,MACF;AACA,uBAAiB;AAAA,IACnB;AAEA,QAAI,UAAU;AAEZ,YAAM,sBAAsB,YAAY,SAAS;AACjD,YAAM,QAAQ,sBAAsB,IAAI,MAAM,OAAO,IAAI,gBAAgB,uBAAuB;AAEhG,aAAO;AAAA,QACL,SAAS;AAAA,QACT;AAAA,QACA,WAAW;AAAA,QACX,cAAc;AAAA,MAAA;AAAA,IAElB;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,OAAO,OAAO,GAAG,WAAW,OAAA;AAChD;AAKA,SAAS,mBAEP,MACA,QACA,aACmB;AAEnB,QAAM,cAAcF,UAAAA,SAAS,QAAQ,EAAE,WAAW,MAAM;AACxD,QAAM,YAAYA,UAAAA,SAAS,MAAM,EAAE,WAAW,MAAM;AAGpD,QAAM,YAAwB,YAAY,IAAI,MAAM,CAAA,CAAE;AAEtD,YAAU,QAAQ,CAAC,MAAM,UAAU;AACjC,gBAAY,QAAQ,CAAC,YAAY,gBAAgB;AAC/C,UAAI,SAAS,cAAc,KAAK,SAAS,UAAU,KAAK,WAAW,SAAS,IAAI,GAAG;AACjF,kBAAU,WAAW,EAAE,KAAK,KAAK;AAAA,MACnC;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AAGD,MAAI,UAAU,KAAK,CAAC,MAAM,EAAE,WAAW,CAAC,GAAG;AACzC,WAAO,EAAE,SAAS,OAAO,OAAO,GAAG,WAAW,OAAA;AAAA,EAChD;AAGA,MAAI,eAAe;AACnB,MAAI,gBAA0B,CAAA;AAE9B,WAAS,oBAAoB,WAAmB,kBAAkC;AAChF,QAAI,cAAc,YAAY,QAAQ;AAEpC,YAAM,SAAS,CAAC,GAAG,gBAAgB,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AACzD,YAAM,WAAW,OAAO,OAAO,SAAS,CAAC,IAAI,OAAO,CAAC;AAErD,UAAI,WAAW,cAAc;AAC3B,uBAAe;AACf,wBAAgB,CAAC,GAAG,gBAAgB;AAAA,MACtC;AACA;AAAA,IACF;AAEA,eAAW,OAAO,UAAU,SAAS,GAAG;AACtC,0BAAoB,YAAY,GAAG,CAAC,GAAG,kBAAkB,GAAG,CAAC;AAAA,IAC/D;AAAA,EACF;AAEA,sBAAoB,GAAG,EAAE;AAGzB,MAAI,gBAAgB,aAAa;AAE/B,UAAM,QAAQ,MAAM,OAAO,IAAI,eAAe;AAE9C,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,MACA,WAAW;AAAA,MACX,cAAc,cAAc,IAAI,CAAC,MAAM,UAAU,CAAC,CAAC;AAAA,IAAA;AAAA,EAEvD;AAEA,SAAO,EAAE,SAAS,OAAO,OAAO,GAAG,WAAW,OAAA;AAChD;;"}