{"version":3,"sources":["../src/extract.ts"],"sourcesContent":["/**\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport JSON5 from 'json5';\nimport { Allow, parse } from 'partial-json';\n\n/**\n * Parses partially complete JSON string.\n */\nexport function parsePartialJson<T = unknown>(jsonString: string): T {\n  return JSON5.parse<T>(JSON.stringify(parse(jsonString, Allow.ALL)));\n}\n\n/**\n * Extracts JSON from string with lenient parsing rules to improve likelihood of successful extraction.\n */\nexport function extractJson<T = unknown>(\n  text: string,\n  throwOnBadJson?: true\n): T;\nexport function extractJson<T = unknown>(\n  text: string,\n  throwOnBadJson?: false\n): T | null;\nexport function extractJson<T = unknown>(\n  text: string,\n  throwOnBadJson?: boolean\n): T | null {\n  let openingChar: '{' | '[' | undefined;\n  let closingChar: '}' | ']' | undefined;\n  let startPos: number | undefined;\n  let nestingCount = 0;\n  let inString = false;\n  let escapeNext = false;\n\n  for (let i = 0; i < text.length; i++) {\n    const char = text[i].replace(/\\u00A0/g, ' ');\n\n    if (escapeNext) {\n      escapeNext = false;\n      continue;\n    }\n\n    if (char === '\\\\') {\n      escapeNext = true;\n      continue;\n    }\n\n    if (char === '\"') {\n      inString = !inString;\n      continue;\n    }\n\n    if (inString) {\n      continue;\n    }\n\n    if (!openingChar && (char === '{' || char === '[')) {\n      // Look for opening character\n      openingChar = char;\n      closingChar = char === '{' ? '}' : ']';\n      startPos = i;\n      nestingCount++;\n    } else if (char === openingChar) {\n      // Increment nesting for matching opening character\n      nestingCount++;\n    } else if (char === closingChar) {\n      // Decrement nesting for matching closing character\n      nestingCount--;\n      if (!nestingCount) {\n        // Reached end of target element\n        return JSON5.parse(text.substring(startPos || 0, i + 1)) as T;\n      }\n    }\n  }\n\n  if (startPos !== undefined && nestingCount > 0) {\n    // If an incomplete JSON structure is detected\n    try {\n      // Parse the incomplete JSON structure using partial-json for lenient parsing\n      return parsePartialJson<T>(text.substring(startPos));\n    } catch {\n      // If parsing fails, throw an error\n      if (throwOnBadJson) {\n        throw new Error(`Invalid JSON extracted from model output: ${text}`);\n      }\n      return null;\n    }\n  }\n  if (throwOnBadJson) {\n    throw new Error(`Invalid JSON extracted from model output: ${text}`);\n  }\n  return null;\n}\n\ninterface ExtractItemsResult {\n  items: unknown[];\n  cursor: number;\n}\n\n/**\n * Extracts complete objects from the first array found in the text.\n * Processes text from the cursor position and returns both complete items\n * and the new cursor position.\n */\nexport function extractItems(text: string, cursor = 0): ExtractItemsResult {\n  const items: unknown[] = [];\n  let currentCursor = cursor;\n\n  // Find the first array start if we haven't already processed any text\n  if (cursor === 0) {\n    const arrayStart = text.indexOf('[');\n    if (arrayStart === -1) {\n      return { items: [], cursor: text.length };\n    }\n    currentCursor = arrayStart + 1;\n  }\n\n  let objectStart = -1;\n  let braceCount = 0;\n  let inString = false;\n  let escapeNext = false;\n\n  // Process the text from the cursor position\n  for (let i = currentCursor; i < text.length; i++) {\n    const char = text[i];\n\n    if (escapeNext) {\n      escapeNext = false;\n      continue;\n    }\n\n    if (char === '\\\\') {\n      escapeNext = true;\n      continue;\n    }\n\n    if (char === '\"') {\n      inString = !inString;\n      continue;\n    }\n\n    if (inString) {\n      continue;\n    }\n\n    if (char === '{') {\n      if (braceCount === 0) {\n        objectStart = i;\n      }\n      braceCount++;\n    } else if (char === '}') {\n      braceCount--;\n      if (braceCount === 0 && objectStart !== -1) {\n        try {\n          const obj = JSON5.parse(text.substring(objectStart, i + 1));\n          items.push(obj);\n          currentCursor = i + 1;\n          objectStart = -1;\n        } catch {\n          // If parsing fails, continue\n        }\n      }\n    } else if (char === ']' && braceCount === 0) {\n      // End of array\n      break;\n    }\n  }\n\n  return {\n    items,\n    cursor: currentCursor,\n  };\n}\n"],"mappings":"AAgBA,OAAO,WAAW;AAClB,SAAS,OAAO,aAAa;AAKtB,SAAS,iBAA8B,YAAuB;AACnE,SAAO,MAAM,MAAS,KAAK,UAAU,MAAM,YAAY,MAAM,GAAG,CAAC,CAAC;AACpE;AAaO,SAAS,YACd,MACA,gBACU;AACV,MAAI;AACJ,MAAI;AACJ,MAAI;AACJ,MAAI,eAAe;AACnB,MAAI,WAAW;AACf,MAAI,aAAa;AAEjB,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,OAAO,KAAK,CAAC,EAAE,QAAQ,WAAW,GAAG;AAE3C,QAAI,YAAY;AACd,mBAAa;AACb;AAAA,IACF;AAEA,QAAI,SAAS,MAAM;AACjB,mBAAa;AACb;AAAA,IACF;AAEA,QAAI,SAAS,KAAK;AAChB,iBAAW,CAAC;AACZ;AAAA,IACF;AAEA,QAAI,UAAU;AACZ;AAAA,IACF;AAEA,QAAI,CAAC,gBAAgB,SAAS,OAAO,SAAS,MAAM;AAElD,oBAAc;AACd,oBAAc,SAAS,MAAM,MAAM;AACnC,iBAAW;AACX;AAAA,IACF,WAAW,SAAS,aAAa;AAE/B;AAAA,IACF,WAAW,SAAS,aAAa;AAE/B;AACA,UAAI,CAAC,cAAc;AAEjB,eAAO,MAAM,MAAM,KAAK,UAAU,YAAY,GAAG,IAAI,CAAC,CAAC;AAAA,MACzD;AAAA,IACF;AAAA,EACF;AAEA,MAAI,aAAa,UAAa,eAAe,GAAG;AAE9C,QAAI;AAEF,aAAO,iBAAoB,KAAK,UAAU,QAAQ,CAAC;AAAA,IACrD,QAAQ;AAEN,UAAI,gBAAgB;AAClB,cAAM,IAAI,MAAM,6CAA6C,IAAI,EAAE;AAAA,MACrE;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACA,MAAI,gBAAgB;AAClB,UAAM,IAAI,MAAM,6CAA6C,IAAI,EAAE;AAAA,EACrE;AACA,SAAO;AACT;AAYO,SAAS,aAAa,MAAc,SAAS,GAAuB;AACzE,QAAM,QAAmB,CAAC;AAC1B,MAAI,gBAAgB;AAGpB,MAAI,WAAW,GAAG;AAChB,UAAM,aAAa,KAAK,QAAQ,GAAG;AACnC,QAAI,eAAe,IAAI;AACrB,aAAO,EAAE,OAAO,CAAC,GAAG,QAAQ,KAAK,OAAO;AAAA,IAC1C;AACA,oBAAgB,aAAa;AAAA,EAC/B;AAEA,MAAI,cAAc;AAClB,MAAI,aAAa;AACjB,MAAI,WAAW;AACf,MAAI,aAAa;AAGjB,WAAS,IAAI,eAAe,IAAI,KAAK,QAAQ,KAAK;AAChD,UAAM,OAAO,KAAK,CAAC;AAEnB,QAAI,YAAY;AACd,mBAAa;AACb;AAAA,IACF;AAEA,QAAI,SAAS,MAAM;AACjB,mBAAa;AACb;AAAA,IACF;AAEA,QAAI,SAAS,KAAK;AAChB,iBAAW,CAAC;AACZ;AAAA,IACF;AAEA,QAAI,UAAU;AACZ;AAAA,IACF;AAEA,QAAI,SAAS,KAAK;AAChB,UAAI,eAAe,GAAG;AACpB,sBAAc;AAAA,MAChB;AACA;AAAA,IACF,WAAW,SAAS,KAAK;AACvB;AACA,UAAI,eAAe,KAAK,gBAAgB,IAAI;AAC1C,YAAI;AACF,gBAAM,MAAM,MAAM,MAAM,KAAK,UAAU,aAAa,IAAI,CAAC,CAAC;AAC1D,gBAAM,KAAK,GAAG;AACd,0BAAgB,IAAI;AACpB,wBAAc;AAAA,QAChB,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF,WAAW,SAAS,OAAO,eAAe,GAAG;AAE3C;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,QAAQ;AAAA,EACV;AACF;","names":[]}