{"version":3,"file":"ai-model/models/ui-tars/adapter.mjs","sources":["../../../../../src/ai-model/models/ui-tars/adapter.ts"],"sourcesContent":["import { type TModelFamily, UITarsModelVersion } from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport type {\n  JsonParserContext,\n  JsonParserSource,\n  ModelAdapterDefinition,\n} from '../../model-adapter/types';\nimport {\n  extractJSONFromCodeBlock,\n  safeParseJson,\n} from '../../service-caller/json';\nimport {\n  type LocateResultValue,\n  unwrapCoordinateListLikeInput,\n} from '../../shared/model-locate-result';\nimport { createUiTarsPlanner } from './planning';\n\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nfunction normalizeJsonObject(\n  obj: any,\n  context: Pick<JsonParserContext, 'preserveStringValueKeys'> = {},\n): any {\n  if (obj === null || obj === undefined) {\n    return obj;\n  }\n\n  if (Array.isArray(obj)) {\n    return obj.map((item) => normalizeJsonObject(item, context));\n  }\n\n  if (typeof obj === 'object') {\n    const normalized: any = {};\n    for (const [key, value] of Object.entries(obj)) {\n      const trimmedKey = key.trim();\n      const preserveStringValue =\n        context.preserveStringValueKeys?.includes(trimmedKey) ?? false;\n      const normalizedValue =\n        typeof value === 'string'\n          ? preserveStringValue\n            ? value\n            : value.trim()\n          : normalizeJsonObject(value, context);\n      normalized[trimmedKey] = normalizedValue;\n    }\n    return normalized;\n  }\n\n  return typeof obj === 'string' ? obj.trim() : obj;\n}\n\nfunction shouldRepairUiTarsLocateJson(source: JsonParserSource) {\n  return (\n    source === 'locate' ||\n    source === 'section-locator' ||\n    source === 'planning-action-param'\n  );\n}\n\nfunction preprocessUiTarsLocateJson(input: string) {\n  if (input.includes('bbox')) {\n    while (/\\d+\\s+\\d+/.test(input)) {\n      input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n    }\n  }\n  return input;\n}\n\nconst uiTarsJsonParser: ModelAdapterDefinition['jsonParser'] = (\n  raw,\n  context = { source: 'generic-object' },\n) => {\n  const { source } = context;\n  try {\n    return safeParseJson(raw, context);\n  } catch (firstError) {\n    if (!shouldRepairUiTarsLocateJson(source)) {\n      throw firstError;\n    }\n\n    const jsonString = preprocessUiTarsLocateJson(\n      extractJSONFromCodeBlock(raw),\n    );\n    try {\n      return normalizeJsonObject(JSON.parse(jsonrepair(jsonString)), context);\n    } catch (error) {\n      throw Error(\n        `failed to parse LLM response into JSON. Error - ${String(\n          error ?? firstError ?? 'unknown error',\n        )}. Response - \\n ${raw}`,\n      );\n    }\n  }\n};\n\n// UI-TARS has not received active updates for a long time, so this parser is\n// intentionally kept separate from Doubao even though the current logic is the\n// same. This avoids coupling UI-TARS behavior to future Doubao adapter changes.\nfunction parseUiTarsRawLocateValue(input: unknown): LocateResultValue {\n  const bbox = unwrapCoordinateListLikeInput(input as any);\n  if (typeof bbox === 'string') {\n    assert(\n      /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n      `invalid bbox data string for ui-tars mode: ${bbox}`,\n    );\n    const splitted = bbox.split(' ');\n    if (splitted.length === 4) {\n      return {\n        type: 'bbox',\n        coordinates: [\n          Number(splitted[0]),\n          Number(splitted[1]),\n          Number(splitted[2]),\n          Number(splitted[3]),\n        ],\n      };\n    }\n    throw new Error(`invalid bbox data string for ui-tars mode: ${bbox}`);\n  }\n\n  let bboxList: number[] = [];\n  if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n    bbox.forEach((item) => {\n      if (typeof item === 'string' && item.includes(',')) {\n        const [x, y] = item.split(',');\n        bboxList.push(Number(x.trim()), Number(y.trim()));\n      } else if (typeof item === 'string' && item.includes(' ')) {\n        const [x, y] = item.split(' ');\n        bboxList.push(Number(x.trim()), Number(y.trim()));\n      } else {\n        bboxList.push(Number(item));\n      }\n    });\n  } else {\n    bboxList = bbox as number[];\n  }\n\n  if (bboxList.length === 4 || bboxList.length === 5) {\n    return {\n      type: 'bbox',\n      coordinates: [bboxList[0], bboxList[1], bboxList[2], bboxList[3]],\n    };\n  }\n\n  if (\n    bboxList.length === 6 ||\n    bboxList.length === 2 ||\n    bboxList.length === 3 ||\n    bboxList.length === 7\n  ) {\n    return { type: 'point', coordinates: [bboxList[0], bboxList[1]] };\n  }\n\n  if (bbox.length === 8) {\n    return {\n      type: 'bbox',\n      coordinates: [bboxList[0], bboxList[1], bboxList[4], bboxList[5]],\n    };\n  }\n\n  const msg = `invalid bbox data for ui-tars mode: ${JSON.stringify(bbox)} `;\n  throw new Error(msg);\n}\n\nfunction createUiTarsAdapter(\n  uiTarsModelVersion: UITarsModelVersion,\n): ModelAdapterDefinition {\n  return {\n    jsonParser: uiTarsJsonParser,\n    chatCompletion: {\n      unsupportedUserConfig: [\n        'reasoningEnabled',\n        'reasoningEffort',\n        'reasoningBudget',\n      ],\n      buildChatCompletionParams: ({ midsceneDefaults, userConfig }) => {\n        const commonOverrideConfig: Record<string, unknown> = {};\n\n        if (userConfig.temperature !== undefined) {\n          commonOverrideConfig.temperature = userConfig.temperature;\n        }\n\n        return {\n          config: {\n            ...midsceneDefaults,\n            ...commonOverrideConfig,\n          },\n        };\n      },\n    },\n    planning: {\n      kind: 'custom',\n      cacheEnabled: false,\n      defaultReplanningCycleLimit: defaultVlmUiTarsReplanningCycleLimit,\n      planner: createUiTarsPlanner(uiTarsModelVersion),\n    },\n    locate: {\n      resultAdapter: {\n        coordinates: { shape: 'bbox', order: 'xy', normalizedBy: 1000 },\n        parseRawLocateValue: parseUiTarsRawLocateValue,\n      },\n    },\n  };\n}\n\nconst uiTarsDoubao15Adapter = createUiTarsAdapter(\n  UITarsModelVersion.DOUBAO_1_5_20B,\n);\n\nexport const uiTarsAdapters = {\n  'vlm-ui-tars': createUiTarsAdapter(UITarsModelVersion.V1_0),\n  'vlm-ui-tars-doubao': uiTarsDoubao15Adapter,\n  'vlm-ui-tars-doubao-1.5': uiTarsDoubao15Adapter,\n} satisfies Pick<\n  Record<TModelFamily, ModelAdapterDefinition>,\n  'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5'\n>;\n"],"names":["defaultVlmUiTarsReplanningCycleLimit","normalizeJsonObject","obj","context","Array","item","normalized","key","value","Object","trimmedKey","preserveStringValue","normalizedValue","shouldRepairUiTarsLocateJson","source","preprocessUiTarsLocateJson","input","uiTarsJsonParser","raw","safeParseJson","firstError","jsonString","extractJSONFromCodeBlock","JSON","jsonrepair","error","Error","String","parseUiTarsRawLocateValue","bbox","unwrapCoordinateListLikeInput","assert","splitted","Number","bboxList","x","y","msg","createUiTarsAdapter","uiTarsModelVersion","midsceneDefaults","userConfig","commonOverrideConfig","undefined","createUiTarsPlanner","uiTarsDoubao15Adapter","UITarsModelVersion","uiTarsAdapters"],"mappings":";;;;;;AAkBA,MAAMA,uCAAuC;AAE7C,SAASC,oBACPC,GAAQ,EACRC,UAA8D,CAAC,CAAC;IAEhE,IAAID,QAAAA,KACF,OAAOA;IAGT,IAAIE,MAAM,OAAO,CAACF,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACG,OAASJ,oBAAoBI,MAAMF;IAGrD,IAAI,AAAe,YAAf,OAAOD,KAAkB;QAC3B,MAAMI,aAAkB,CAAC;QACzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACP,KAAM;YAC9C,MAAMQ,aAAaH,IAAI,IAAI;YAC3B,MAAMI,sBACJR,QAAQ,uBAAuB,EAAE,SAASO,eAAe;YAC3D,MAAME,kBACJ,AAAiB,YAAjB,OAAOJ,QACHG,sBACEH,QACAA,MAAM,IAAI,KACZP,oBAAoBO,OAAOL;YACjCG,UAAU,CAACI,WAAW,GAAGE;QAC3B;QACA,OAAON;IACT;IAEA,OAAO,AAAe,YAAf,OAAOJ,MAAmBA,IAAI,IAAI,KAAKA;AAChD;AAEA,SAASW,6BAA6BC,MAAwB;IAC5D,OACEA,AAAW,aAAXA,UACAA,AAAW,sBAAXA,UACAA,AAAW,4BAAXA;AAEJ;AAEA,SAASC,2BAA2BC,KAAa;IAC/C,IAAIA,MAAM,QAAQ,CAAC,SACjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEA,MAAMC,mBAAyD,CAC7DC,KACAf,UAAU;IAAE,QAAQ;AAAiB,CAAC;IAEtC,MAAM,EAAEW,MAAM,EAAE,GAAGX;IACnB,IAAI;QACF,OAAOgB,cAAcD,KAAKf;IAC5B,EAAE,OAAOiB,YAAY;QACnB,IAAI,CAACP,6BAA6BC,SAChC,MAAMM;QAGR,MAAMC,aAAaN,2BACjBO,yBAAyBJ;QAE3B,IAAI;YACF,OAAOjB,oBAAoBsB,KAAK,KAAK,CAACC,WAAWH,cAAclB;QACjE,EAAE,OAAOsB,OAAO;YACd,MAAMC,MACJ,CAAC,gDAAgD,EAAEC,OACjDF,SAASL,cAAc,iBACvB,gBAAgB,EAAEF,KAAK;QAE7B;IACF;AACF;AAKA,SAASU,0BAA0BZ,KAAc;IAC/C,MAAMa,OAAOC,8BAA8Bd;IAC3C,IAAI,AAAgB,YAAhB,OAAOa,MAAmB;QAC5BE,OACE,+BAA+B,IAAI,CAACF,KAAK,IAAI,KAC7C,CAAC,2CAA2C,EAAEA,MAAM;QAEtD,MAAMG,WAAWH,KAAK,KAAK,CAAC;QAC5B,IAAIG,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,MAAM;YACN,aAAa;gBACXC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;aACnB;QACH;QAEF,MAAM,IAAIN,MAAM,CAAC,2CAA2C,EAAEG,MAAM;IACtE;IAEA,IAAIK,WAAqB,EAAE;IAC3B,IAAI9B,MAAM,OAAO,CAACyB,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACxB;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAAC8B,GAAGC,EAAE,GAAG/B,KAAK,KAAK,CAAC;YAC1B6B,SAAS,IAAI,CAACD,OAAOE,EAAE,IAAI,KAAKF,OAAOG,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAO/B,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAAC8B,GAAGC,EAAE,GAAG/B,KAAK,KAAK,CAAC;YAC1B6B,SAAS,IAAI,CAACD,OAAOE,EAAE,IAAI,KAAKF,OAAOG,EAAE,IAAI;QAC/C,OACEF,SAAS,IAAI,CAACD,OAAO5B;IAEzB;SAEA6B,WAAWL;IAGb,IAAIK,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACL,MAAM;QACN,aAAa;YAACA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IACnE;IAGF,IACEA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QAAE,MAAM;QAAS,aAAa;YAACA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IAAC;IAGlE,IAAIL,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACL,MAAM;QACN,aAAa;YAACK,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IACnE;IAGF,MAAMG,MAAM,CAAC,oCAAoC,EAAEd,KAAK,SAAS,CAACM,MAAM,CAAC,CAAC;IAC1E,MAAM,IAAIH,MAAMW;AAClB;AAEA,SAASC,oBACPC,kBAAsC;IAEtC,OAAO;QACL,YAAYtB;QACZ,gBAAgB;YACd,uBAAuB;gBACrB;gBACA;gBACA;aACD;YACD,2BAA2B,CAAC,EAAEuB,gBAAgB,EAAEC,UAAU,EAAE;gBAC1D,MAAMC,uBAAgD,CAAC;gBAEvD,IAAID,AAA2BE,WAA3BF,WAAW,WAAW,EACxBC,qBAAqB,WAAW,GAAGD,WAAW,WAAW;gBAG3D,OAAO;oBACL,QAAQ;wBACN,GAAGD,gBAAgB;wBACnB,GAAGE,oBAAoB;oBACzB;gBACF;YACF;QACF;QACA,UAAU;YACR,MAAM;YACN,cAAc;YACd,6BAA6B1C;YAC7B,SAAS4C,oBAAoBL;QAC/B;QACA,QAAQ;YACN,eAAe;gBACb,aAAa;oBAAE,OAAO;oBAAQ,OAAO;oBAAM,cAAc;gBAAK;gBAC9D,qBAAqBX;YACvB;QACF;IACF;AACF;AAEA,MAAMiB,wBAAwBP,oBAC5BQ,mBAAmB,cAAc;AAG5B,MAAMC,iBAAiB;IAC5B,eAAeT,oBAAoBQ,mBAAmB,IAAI;IAC1D,sBAAsBD;IACtB,0BAA0BA;AAC5B"}