{"version":3,"file":"language-detection.cjs","sources":["../../../src/utils/language-detection.ts"],"sourcesContent":["/**\n * Language auto-detection utility\n * Uses character-based heuristics to detect languages in text\n */\n\nexport interface LanguageDetectionResult {\n  /** Detected languages */\n  languages: string[];\n  /** Confidence scores for each language (0-1) */\n  confidence: Record<string, number>;\n  /** Primary language (highest confidence) */\n  primary: string;\n}\n\n/**\n * Detect languages from text using character-based heuristics\n * Detects multiple languages if present in the same text\n * \n * @param text - Text to analyze\n * @returns Array of detected language codes\n * \n * @example\n * detectLanguages('Müller café hello')\n * // → ['english', 'german', 'french']\n */\nexport function detectLanguages(text: string): string[] {\n  if (!text || text.trim().length === 0) {\n    return ['english']; // Default fallback\n  }\n\n  const detected = new Set<string>();\n\n  // Always include English as base language\n  detected.add('english');\n\n  // German indicators: ä, ö, ü, ß\n  if (/[äöüßÄÖÜ]/.test(text)) {\n    detected.add('german');\n  }\n\n  // French indicators: é, è, ê, à, ç, œ, etc.\n  if (/[àâäæçéèêëïîôùûüÿœÀÂÄÆÇÉÈÊËÏÎÔÙÛÜŸŒ]/.test(text)) {\n    detected.add('french');\n  }\n\n  // Spanish indicators: ñ, á, é, í, ó, ú, ¿, ¡\n  if (/[áéíóúñüÁÉÍÓÚÑÜ¿¡]/.test(text)) {\n    detected.add('spanish');\n  }\n\n  return Array.from(detected);\n}\n\n/**\n * Detect languages with confidence scores\n * Provides more detailed information about language detection\n * \n * @param text - Text to analyze\n * @returns Detection result with confidence scores\n */\nexport function detectLanguagesWithConfidence(text: string): LanguageDetectionResult {\n  if (!text || text.trim().length === 0) {\n    return {\n      languages: ['english'],\n      confidence: { english: 1.0 },\n      primary: 'english',\n    };\n  }\n\n  const confidence: Record<string, number> = {\n    english: 0.5, // Base confidence for English\n  };\n\n  const textLength = text.length;\n\n  // Count German characters\n  const germanChars = (text.match(/[äöüßÄÖÜ]/g) || []).length;\n  if (germanChars > 0) {\n    confidence.german = Math.min(1.0, 0.5 + (germanChars / textLength) * 10);\n  }\n\n  // Count French characters\n  const frenchChars = (text.match(/[àâäæçéèêëïîôùûüÿœÀÂÄÆÇÉÈÊËÏÎÔÙÛÜŸŒ]/g) || []).length;\n  if (frenchChars > 0) {\n    confidence.french = Math.min(1.0, 0.5 + (frenchChars / textLength) * 10);\n  }\n\n  // Count Spanish characters\n  const spanishChars = (text.match(/[áéíóúñüÁÉÍÓÚÑÜ¿¡]/g) || []).length;\n  if (spanishChars > 0) {\n    confidence.spanish = Math.min(1.0, 0.5 + (spanishChars / textLength) * 10);\n  }\n\n  // Determine languages (confidence > 0.5)\n  const languages = Object.entries(confidence)\n    .filter(([_, conf]) => conf >= 0.5)\n    .map(([lang]) => lang);\n\n  // Find primary language (highest confidence)\n  const primary = Object.entries(confidence)\n    .sort(([, a], [, b]) => b - a)[0][0];\n\n  return {\n    languages,\n    confidence,\n    primary,\n  };\n}\n\n/**\n * Sample text from a dataset for language detection\n * Takes first N items to avoid processing entire large datasets\n * \n * @param words - Array of words or objects\n * @param sampleSize - Number of items to sample (default: 100)\n * @returns Combined sample text\n */\nexport function sampleTextForDetection(\n  //\n  words: (string | any)[], sampleSize: number = 100): string {\n  const sample = words.slice(0, Math.min(sampleSize, words.length));\n  \n  return sample\n    .map(item => {\n      if (typeof item === 'string') {\n        return item;\n      } else if (typeof item === 'object' && item !== null) {\n        // Extract text from object fields\n        return Object.values(item)\n          .filter(v => typeof v === 'string')\n          .join(' ');\n      }\n      return '';\n    })\n    .join(' ');\n}\n\n/**\n * Check if a language code is valid\n */\nexport function isValidLanguage(lang: string): boolean {\n  const validLanguages = ['english', 'german', 'french', 'spanish', 'auto'];\n  return validLanguages.includes(lang.toLowerCase());\n}\n\n/**\n * Normalize language codes\n * Handles common variations and aliases\n */\nexport function normalizeLanguageCode(lang: string): string {\n  const normalized = lang.toLowerCase().trim();\n  \n  // Handle aliases\n  const aliases: Record<string, string> = {\n    'en': 'english',\n    'de': 'german',\n    'fr': 'french',\n    'es': 'spanish',\n    'eng': 'english',\n    'deu': 'german',\n    'fra': 'french',\n    'esp': 'spanish',\n  };\n  \n  return aliases[normalized] || normalized;\n}\n"],"names":[],"mappings":";;AAyBO,SAAS,gBAAgB,MAAwB;AACtD,MAAI,CAAC,QAAQ,KAAK,KAAA,EAAO,WAAW,GAAG;AACrC,WAAO,CAAC,SAAS;AAAA,EACnB;AAEA,QAAM,+BAAe,IAAA;AAGrB,WAAS,IAAI,SAAS;AAGtB,MAAI,YAAY,KAAK,IAAI,GAAG;AAC1B,aAAS,IAAI,QAAQ;AAAA,EACvB;AAGA,MAAI,uCAAuC,KAAK,IAAI,GAAG;AACrD,aAAS,IAAI,QAAQ;AAAA,EACvB;AAGA,MAAI,qBAAqB,KAAK,IAAI,GAAG;AACnC,aAAS,IAAI,SAAS;AAAA,EACxB;AAEA,SAAO,MAAM,KAAK,QAAQ;AAC5B;AASO,SAAS,8BAA8B,MAAuC;AACnF,MAAI,CAAC,QAAQ,KAAK,KAAA,EAAO,WAAW,GAAG;AACrC,WAAO;AAAA,MACL,WAAW,CAAC,SAAS;AAAA,MACrB,YAAY,EAAE,SAAS,EAAA;AAAA,MACvB,SAAS;AAAA,IAAA;AAAA,EAEb;AAEA,QAAM,aAAqC;AAAA,IACzC,SAAS;AAAA;AAAA,EAAA;AAGX,QAAM,aAAa,KAAK;AAGxB,QAAM,eAAe,KAAK,MAAM,YAAY,KAAK,CAAA,GAAI;AACrD,MAAI,cAAc,GAAG;AACnB,eAAW,SAAS,KAAK,IAAI,GAAK,MAAO,cAAc,aAAc,EAAE;AAAA,EACzE;AAGA,QAAM,eAAe,KAAK,MAAM,uCAAuC,KAAK,CAAA,GAAI;AAChF,MAAI,cAAc,GAAG;AACnB,eAAW,SAAS,KAAK,IAAI,GAAK,MAAO,cAAc,aAAc,EAAE;AAAA,EACzE;AAGA,QAAM,gBAAgB,KAAK,MAAM,qBAAqB,KAAK,CAAA,GAAI;AAC/D,MAAI,eAAe,GAAG;AACpB,eAAW,UAAU,KAAK,IAAI,GAAK,MAAO,eAAe,aAAc,EAAE;AAAA,EAC3E;AAGA,QAAM,YAAY,OAAO,QAAQ,UAAU,EACxC,OAAO,CAAC,CAAC,GAAG,IAAI,MAAM,QAAQ,GAAG,EACjC,IAAI,CAAC,CAAC,IAAI,MAAM,IAAI;AAGvB,QAAM,UAAU,OAAO,QAAQ,UAAU,EACtC,KAAK,CAAC,CAAA,EAAG,CAAC,GAAG,CAAA,EAAG,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;AAErC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EAAA;AAEJ;AAUO,SAAS,uBAEd,OAAyB,aAAqB,KAAa;AAC3D,QAAM,SAAS,MAAM,MAAM,GAAG,KAAK,IAAI,YAAY,MAAM,MAAM,CAAC;AAEhE,SAAO,OACJ,IAAI,CAAA,SAAQ;AACX,QAAI,OAAO,SAAS,UAAU;AAC5B,aAAO;AAAA,IACT,WAAW,OAAO,SAAS,YAAY,SAAS,MAAM;AAEpD,aAAO,OAAO,OAAO,IAAI,EACtB,OAAO,CAAA,MAAK,OAAO,MAAM,QAAQ,EACjC,KAAK,GAAG;AAAA,IACb;AACA,WAAO;AAAA,EACT,CAAC,EACA,KAAK,GAAG;AACb;AAKO,SAAS,gBAAgB,MAAuB;AACrD,QAAM,iBAAiB,CAAC,WAAW,UAAU,UAAU,WAAW,MAAM;AACxE,SAAO,eAAe,SAAS,KAAK,YAAA,CAAa;AACnD;AAMO,SAAS,sBAAsB,MAAsB;AAC1D,QAAM,aAAa,KAAK,YAAA,EAAc,KAAA;AAGtC,QAAM,UAAkC;AAAA,IACtC,MAAM;AAAA,IACN,MAAM;AAAA,IACN,MAAM;AAAA,IACN,MAAM;AAAA,IACN,OAAO;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,EAAA;AAGT,SAAO,QAAQ,UAAU,KAAK;AAChC;;;;;;"}