{"version":3,"file":"ai-model/models/ui-tars/parser.mjs","sources":["../../../../../src/ai-model/models/ui-tars/parser.ts"],"sourcesContent":["import type { Size } from '@/types';\nimport type { UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { actionParser } from '@ui-tars/action-parser';\n\ntype ActionType =\n  | 'click'\n  | 'left_double'\n  | 'right_single'\n  | 'drag'\n  | 'type'\n  | 'hotkey'\n  | 'finished'\n  | 'scroll'\n  | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\n\nexport interface UiTarsParsedPlanningResponse {\n  rawResponse: string;\n  actions: ReturnType<typeof actionParser>['parsed'];\n}\n\nexport function parseUiTarsPlanningResponse(\n  rawResponse: string,\n  shotSize: Size,\n  uiTarsModelVersion: UITarsModelVersion,\n): UiTarsParsedPlanningResponse {\n  const convertedText = convertBboxToCoordinates(rawResponse);\n  const parseResult = actionParser({\n    prediction: convertedText,\n    factor: [1000, 1000],\n    screenContext: {\n      width: shotSize.width,\n      height: shotSize.height,\n    },\n    modelVer: uiTarsModelVersion,\n  });\n\n  debug(\n    'ui-tars modelVer',\n    uiTarsModelVersion,\n    ', parsed',\n    JSON.stringify(parseResult.parsed),\n  );\n\n  return {\n    rawResponse,\n    actions: parseResult.parsed,\n  };\n}\n\n/**\n * Converts bounding box notation to coordinate points.\n */\nfunction convertBboxToCoordinates(text: string): string {\n  const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n  function replaceMatch(\n    match: string,\n    x1: string,\n    y1: string,\n    x2: string,\n    y2: string,\n  ): string {\n    const x1Num = Number.parseInt(x1, 10);\n    const y1Num = Number.parseInt(y1, 10);\n    const x2Num = Number.parseInt(x2, 10);\n    const y2Num = Number.parseInt(y2, 10);\n\n    const x = Math.floor((x1Num + x2Num) / 2);\n    const y = Math.floor((y1Num + y2Num) / 2);\n\n    return `(${x},${y})`;\n  }\n\n  const cleanedText = text\n    .replace(/\\[EOS\\]/g, '')\n    .replace(/```(?:[a-zA-Z0-9_-]+)?/g, '');\n  return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\ninterface BaseAction {\n  action_type: ActionType;\n  action_inputs: Record<string, any>;\n  reflection: string | null;\n  thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n  action_type: 'click';\n  action_inputs: {\n    start_box: string;\n  };\n}\n\ninterface DragAction extends BaseAction {\n  action_type: 'drag';\n  action_inputs: {\n    start_box: string;\n    end_box: string;\n  };\n}\n\ninterface WaitAction extends BaseAction {\n  action_type: 'wait';\n  action_inputs: {\n    time: string;\n  };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n  action_type: 'left_double';\n  action_inputs: {\n    start_box: string;\n  };\n}\n\ninterface RightSingleAction extends BaseAction {\n  action_type: 'right_single';\n  action_inputs: {\n    start_box: string;\n  };\n}\n\ninterface TypeAction extends BaseAction {\n  action_type: 'type';\n  action_inputs: {\n    content: string;\n  };\n}\n\ninterface HotkeyAction extends BaseAction {\n  action_type: 'hotkey';\n  action_inputs: {\n    key: string;\n  };\n}\n\ninterface ScrollAction extends BaseAction {\n  action_type: 'scroll';\n  action_inputs: {\n    direction: 'up' | 'down';\n  };\n}\n\ninterface FinishedAction extends BaseAction {\n  action_type: 'finished';\n  action_inputs: {\n    content?: string;\n  };\n}\n\nexport type Action =\n  | ClickAction\n  | LeftDoubleAction\n  | RightSingleAction\n  | DragAction\n  | TypeAction\n  | HotkeyAction\n  | ScrollAction\n  | FinishedAction\n  | WaitAction;\n"],"names":["debug","getDebug","parseUiTarsPlanningResponse","rawResponse","shotSize","uiTarsModelVersion","convertedText","convertBboxToCoordinates","parseResult","actionParser","JSON","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","Math","y","cleanedText"],"mappings":";;AAgBA,MAAMA,QAAQC,SAAS;AAOhB,SAASC,4BACdC,WAAmB,EACnBC,QAAc,EACdC,kBAAsC;IAEtC,MAAMC,gBAAgBC,yBAAyBJ;IAC/C,MAAMK,cAAcC,aAAa;QAC/B,YAAYH;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOF,SAAS,KAAK;YACrB,QAAQA,SAAS,MAAM;QACzB;QACA,UAAUC;IACZ;IAEAL,MACE,oBACAK,oBACA,YACAK,KAAK,SAAS,CAACF,YAAY,MAAM;IAGnC,OAAO;QACLL;QACA,SAASK,YAAY,MAAM;IAC7B;AACF;AAKA,SAASD,yBAAyBI,IAAY;IAC5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAEV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAElC,MAAMM,IAAIC,KAAK,KAAK,CAAEN,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMI,IAAID,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAEvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEE,EAAE,CAAC,CAAC;IACtB;IAEA,MAAMC,cAAchB,KACjB,OAAO,CAAC,YAAY,IACpB,OAAO,CAAC,2BAA2B;IACtC,OAAOgB,YAAY,OAAO,CAACf,SAASC,cAAc,IAAI;AACxD"}