{"version":3,"file":"utils.mjs","sources":["webpack://@ui-tars/sdk/./src/utils.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport { Jimp } from 'jimp';\nimport { ChatCompletionMessageParam } from 'openai/resources/chat/completions';\n\nimport { IMAGE_PLACEHOLDER, MAX_IMAGE_LENGTH } from '@ui-tars/shared/constants';\nimport { Conversation, Message } from '@ui-tars/shared/types';\nimport { DEFAULT_FACTORS, type Factors } from './constants';\nimport {\n  ResponseInput,\n  ResponseInputImage,\n  ResponseInputItem,\n  ResponseInputText,\n} from 'openai/resources/responses/responses.js';\n\n/**\n * Parse box string to screen coordinates\n *\n *   e.g. '[0.131,0.25,0.131,0.25]' 2560x1440 -> { x: 335.36, y: 360 }\n *\n * @param boxStr box string\n * @param screenWidth screen width\n * @param screenHeight screen height\n * @param factors scaling factor, the training space of the target model.\n * @returns screen coordinates\n */\nexport const parseBoxToScreenCoords = ({\n  boxStr,\n  screenWidth,\n  screenHeight,\n  factors = DEFAULT_FACTORS,\n}: {\n  boxStr: string;\n  screenWidth: number;\n  screenHeight: number;\n  factors?: Factors;\n}) => {\n  if (!boxStr) {\n    return { x: null, y: null };\n  }\n  const coords = boxStr\n    .replace('[', '')\n    .replace(']', '')\n    .split(',')\n    .map((num) => parseFloat(num.trim()));\n\n  const [x1, y1, x2 = x1, y2 = y1] = coords;\n  const [widthFactor, heightFactor] = factors;\n\n  return {\n    x: Math.round(((x1 + x2) / 2) * screenWidth * widthFactor) / widthFactor,\n    y: Math.round(((y1 + y2) / 2) * screenHeight * heightFactor) / heightFactor,\n  };\n};\n\nexport const processVlmParams = (\n  conversations: Message[],\n  images: string[],\n  maxImageLength: number = MAX_IMAGE_LENGTH,\n): {\n  images: string[];\n  conversations: Message[];\n} => {\n  // Check if the images array exceeds the limit\n  // TODO: configurable max image length\n  if (images.length > maxImageLength) {\n    // Calculate the number of items to remove\n    const excessCount = images.length - maxImageLength;\n\n    // Remove excess images from the start\n    images = images.slice(excessCount);\n\n    // Remove corresponding conversations where \"value\" is \"<image>\"\n    let imageCountToRemove = excessCount;\n    conversations = conversations.filter((convo) => {\n      if (imageCountToRemove > 0 && convo.value === IMAGE_PLACEHOLDER) {\n        imageCountToRemove--;\n        return false;\n      }\n      return true;\n    });\n  }\n\n  // Return the processed result\n  return { images, conversations };\n};\n\nexport const toVlmModelFormat = ({\n  historyMessages,\n  conversations,\n  systemPrompt,\n}: {\n  historyMessages: Message[];\n  conversations: Conversation[];\n  systemPrompt: string;\n}): {\n  conversations: Message[];\n  images: string[];\n} => {\n  const USER_INSTRUCTION_MARKER = '## User Instruction';\n  const history = formatHistoryMessages(historyMessages);\n  return {\n    conversations: conversations.map((conv, idx) => {\n      if (idx === 0 && conv.from === 'human') {\n        let newValue = '';\n        if (systemPrompt.includes(USER_INSTRUCTION_MARKER)) {\n          const insertIndex = systemPrompt.lastIndexOf(USER_INSTRUCTION_MARKER);\n          const slicedPrefix = systemPrompt.slice(0, insertIndex);\n          const slicedSuffix = systemPrompt.slice(insertIndex);\n          newValue =\n            slicedPrefix +\n            (slicedPrefix.endsWith('\\n') ? '' : '\\n') +\n            history +\n            '\\n' +\n            slicedSuffix +\n            (slicedSuffix.endsWith('\\n') ? '' : '\\n') +\n            conv.value;\n        } else {\n          newValue = `${systemPrompt}\\n${history}\\n${USER_INSTRUCTION_MARKER}\\n${conv.value}`;\n        }\n        return {\n          from: conv.from,\n          value: newValue,\n        };\n      }\n      return {\n        from: conv.from,\n        value: conv.value,\n      };\n    }),\n    images: conversations\n      .filter(\n        (conv) => conv.value === IMAGE_PLACEHOLDER && !!conv.screenshotBase64,\n      )\n      .map((conv) => conv.screenshotBase64!),\n  };\n};\n\nexport const getSummary = (prediction: string) =>\n  prediction\n    .replace(/Reflection:[\\s\\S]*?(?=Action_Summary:|Action:|$)/g, '')\n    .trim();\n\n/**\n * convert conversations to OpenAI ChatCompletionMessageParam\n * @param conversations conversations\n * @param images images\n * @returns OpenAI ChatCompletionMessageParam\n */\nexport const convertToOpenAIMessages = ({\n  conversations,\n  images,\n}: {\n  conversations: Message[];\n  images: string[];\n}): Array<ChatCompletionMessageParam> => {\n  const messages: Array<ChatCompletionMessageParam> = [];\n  let imageIndex = 0;\n\n  conversations.forEach((conv) => {\n    if (conv.value === IMAGE_PLACEHOLDER) {\n      // handle image message\n      if (imageIndex < images.length) {\n        messages.push({\n          role: 'user',\n          content: [\n            {\n              type: 'image_url',\n              image_url: { url: `data:image/png;base64,${images[imageIndex]}` },\n            },\n          ],\n        });\n        imageIndex++;\n      }\n    } else {\n      // handle text message\n      messages.push({\n        role: conv.from === 'human' ? 'user' : 'assistant',\n        content: conv.value,\n      });\n    }\n  });\n\n  return messages;\n};\n\nexport function replaceBase64Prefix(base64: string) {\n  return base64.replace(/^data:image\\/\\w+;base64,/, '');\n}\n\nexport async function preprocessResizeImage(\n  image_base64: string,\n  maxPixels: number,\n): Promise<string> {\n  try {\n    const imageBuffer = Buffer.from(image_base64, 'base64');\n\n    const image = await Jimp.read(imageBuffer);\n    const { width, height } = image.bitmap;\n\n    const currentPixels = width * height;\n    if (currentPixels > maxPixels) {\n      const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n      const newWidth = Math.floor(width * resizeFactor);\n      const newHeight = Math.floor(height * resizeFactor);\n\n      const resized = await image\n        .resize({\n          w: newWidth,\n          h: newHeight,\n        })\n        .getBuffer('image/png', { quality: 60 });\n\n      return resized.toString('base64');\n    }\n\n    const base64 = await image.getBase64('image/png', { quality: 60 });\n\n    return replaceBase64Prefix(base64);\n  } catch (error) {\n    console.error('preprocessResizeImage error:', error);\n    throw error;\n  }\n}\n\nfunction formatHistoryMessages(messages: Message[]): string {\n  const lastMessages = messages.slice(-30);\n\n  const lines = lastMessages.map((msg) => {\n    const role = msg.from === 'human' ? 'human' : 'assistant';\n    return `${role}: ${msg.value}`;\n  });\n\n  // human: xxx, assistant: xxx.\n  // const formattedLines = lines.map((line) => {\n  //   if (line.startsWith('human:')) {\n  //     return line + ',';\n  //   } else {\n  //     return line + '.';\n  //   }\n  // });\n\n  return '## History Messages\\n' + lines.join('\\n') + '\\n';\n}\n\n/**\n * convert ChatCompletionMessageParam to Response API input\n * @param messages messages\n * @returns Response API input\n */\nexport const convertToResponseApiInput = (\n  messages: ChatCompletionMessageParam[],\n): ResponseInput => {\n  return messages.map((message) => {\n    if (Array.isArray(message?.content) && message?.content.length > 0) {\n      const content = message.content.map((item) => {\n        if (item.type === 'image_url' && item.image_url?.url) {\n          return {\n            type: 'input_image',\n            image_url: item.image_url.url,\n          } as ResponseInputImage;\n        }\n        return item;\n      });\n      return {\n        role: message.role,\n        content,\n      } as ResponseInputItem.Message;\n    }\n\n    return message as unknown as ResponseInputItem.Message;\n  });\n};\n\n/**\n * check if the message is an image message\n * @param c message\n * @returns true if the message is an image message\n */\nexport const isMessageImage = (\n  c: ChatCompletionMessageParam | ResponseInputItem,\n) =>\n  'role' in c &&\n  c.role === 'user' &&\n  Array.isArray(c.content) &&\n  c.content.some(\n    (item) =>\n      (item.type === 'image_url' && item.image_url?.url) ||\n      (item.type === 'input_image' && item.image_url),\n  );\n"],"names":["parseBoxToScreenCoords","boxStr","screenWidth","screenHeight","factors","DEFAULT_FACTORS","coords","num","parseFloat","x1","y1","x2","y2","widthFactor","heightFactor","Math","processVlmParams","conversations","images","maxImageLength","MAX_IMAGE_LENGTH","excessCount","imageCountToRemove","convo","IMAGE_PLACEHOLDER","toVlmModelFormat","historyMessages","systemPrompt","USER_INSTRUCTION_MARKER","history","formatHistoryMessages","conv","idx","newValue","insertIndex","slicedPrefix","slicedSuffix","getSummary","prediction","convertToOpenAIMessages","messages","imageIndex","replaceBase64Prefix","base64","preprocessResizeImage","image_base64","maxPixels","imageBuffer","Buffer","image","Jimp","width","height","currentPixels","resizeFactor","newWidth","newHeight","resized","error","console","lastMessages","lines","msg","role","convertToResponseApiInput","message","Array","content","item","_item_image_url","isMessageImage","c"],"mappings":";;;;;;;AA4BO,MAAMA,yBAAyB,CAAC,EACrCC,MAAM,EACNC,WAAW,EACXC,YAAY,EACZC,UAAUC,eAAe,EAM1B;IACC,IAAI,CAACJ,QACH,OAAO;QAAE,GAAG;QAAM,GAAG;IAAK;IAE5B,MAAMK,SAASL,OACZ,OAAO,CAAC,KAAK,IACb,OAAO,CAAC,KAAK,IACb,KAAK,CAAC,KACN,GAAG,CAAC,CAACM,MAAQC,WAAWD,IAAI,IAAI;IAEnC,MAAM,CAACE,IAAIC,IAAIC,KAAKF,EAAE,EAAEG,KAAKF,EAAE,CAAC,GAAGJ;IACnC,MAAM,CAACO,aAAaC,aAAa,GAAGV;IAEpC,OAAO;QACL,GAAGW,KAAK,KAAK,CAAGN,AAAAA,CAAAA,KAAKE,EAAC,IAAK,IAAKT,cAAcW,eAAeA;QAC7D,GAAGE,KAAK,KAAK,CAAGL,AAAAA,CAAAA,KAAKE,EAAC,IAAK,IAAKT,eAAeW,gBAAgBA;IACjE;AACF;AAEO,MAAME,mBAAmB,CAC9BC,eACAC,QACAC,iBAAyBC,gBAAgB;IAOzC,IAAIF,OAAO,MAAM,GAAGC,gBAAgB;QAElC,MAAME,cAAcH,OAAO,MAAM,GAAGC;QAGpCD,SAASA,OAAO,KAAK,CAACG;QAGtB,IAAIC,qBAAqBD;QACzBJ,gBAAgBA,cAAc,MAAM,CAAC,CAACM;YACpC,IAAID,qBAAqB,KAAKC,MAAM,KAAK,KAAKC,mBAAmB;gBAC/DF;gBACA,OAAO;YACT;YACA,OAAO;QACT;IACF;IAGA,OAAO;QAAEJ;QAAQD;IAAc;AACjC;AAEO,MAAMQ,mBAAmB,CAAC,EAC/BC,eAAe,EACfT,aAAa,EACbU,YAAY,EAKb;IAIC,MAAMC,0BAA0B;IAChC,MAAMC,UAAUC,sBAAsBJ;IACtC,OAAO;QACL,eAAeT,cAAc,GAAG,CAAC,CAACc,MAAMC;YACtC,IAAIA,AAAQ,MAARA,OAAaD,AAAc,YAAdA,KAAK,IAAI,EAAc;gBACtC,IAAIE,WAAW;gBACf,IAAIN,aAAa,QAAQ,CAACC,0BAA0B;oBAClD,MAAMM,cAAcP,aAAa,WAAW,CAACC;oBAC7C,MAAMO,eAAeR,aAAa,KAAK,CAAC,GAAGO;oBAC3C,MAAME,eAAeT,aAAa,KAAK,CAACO;oBACxCD,WACEE,eACCA,CAAAA,aAAa,QAAQ,CAAC,QAAQ,KAAK,IAAG,IACvCN,UACA,OACAO,eACCA,CAAAA,aAAa,QAAQ,CAAC,QAAQ,KAAK,IAAG,IACvCL,KAAK,KAAK;gBACd,OACEE,WAAW,GAAGN,aAAa,EAAE,EAAEE,QAAQ,EAAE,EAAED,wBAAwB,EAAE,EAAEG,KAAK,KAAK,EAAE;gBAErF,OAAO;oBACL,MAAMA,KAAK,IAAI;oBACf,OAAOE;gBACT;YACF;YACA,OAAO;gBACL,MAAMF,KAAK,IAAI;gBACf,OAAOA,KAAK,KAAK;YACnB;QACF;QACA,QAAQd,cACL,MAAM,CACL,CAACc,OAASA,KAAK,KAAK,KAAKP,qBAAqB,CAAC,CAACO,KAAK,gBAAgB,EAEtE,GAAG,CAAC,CAACA,OAASA,KAAK,gBAAgB;IACxC;AACF;AAEO,MAAMM,aAAa,CAACC,aACzBA,WACG,OAAO,CAAC,qDAAqD,IAC7D,IAAI;AAQF,MAAMC,0BAA0B,CAAC,EACtCtB,aAAa,EACbC,MAAM,EAIP;IACC,MAAMsB,WAA8C,EAAE;IACtD,IAAIC,aAAa;IAEjBxB,cAAc,OAAO,CAAC,CAACc;QACrB,IAAIA,KAAK,KAAK,KAAKP,mBAEjB;YAAA,IAAIiB,aAAavB,OAAO,MAAM,EAAE;gBAC9BsB,SAAS,IAAI,CAAC;oBACZ,MAAM;oBACN,SAAS;wBACP;4BACE,MAAM;4BACN,WAAW;gCAAE,KAAK,CAAC,sBAAsB,EAAEtB,MAAM,CAACuB,WAAW,EAAE;4BAAC;wBAClE;qBACD;gBACH;gBACAA;YACF;QAAA,OAGAD,SAAS,IAAI,CAAC;YACZ,MAAMT,AAAc,YAAdA,KAAK,IAAI,GAAe,SAAS;YACvC,SAASA,KAAK,KAAK;QACrB;IAEJ;IAEA,OAAOS;AACT;AAEO,SAASE,oBAAoBC,MAAc;IAChD,OAAOA,OAAO,OAAO,CAAC,4BAA4B;AACpD;AAEO,eAAeC,sBACpBC,YAAoB,EACpBC,SAAiB;IAEjB,IAAI;QACF,MAAMC,cAAcC,OAAO,IAAI,CAACH,cAAc;QAE9C,MAAMI,QAAQ,MAAMC,KAAK,IAAI,CAACH;QAC9B,MAAM,EAAEI,KAAK,EAAEC,MAAM,EAAE,GAAGH,MAAM,MAAM;QAEtC,MAAMI,gBAAgBF,QAAQC;QAC9B,IAAIC,gBAAgBP,WAAW;YAC7B,MAAMQ,eAAevC,KAAK,IAAI,CAAC+B,YAAYO;YAC3C,MAAME,WAAWxC,KAAK,KAAK,CAACoC,QAAQG;YACpC,MAAME,YAAYzC,KAAK,KAAK,CAACqC,SAASE;YAEtC,MAAMG,UAAU,MAAMR,MACnB,MAAM,CAAC;gBACN,GAAGM;gBACH,GAAGC;YACL,GACC,SAAS,CAAC,aAAa;gBAAE,SAAS;YAAG;YAExC,OAAOC,QAAQ,QAAQ,CAAC;QAC1B;QAEA,MAAMd,SAAS,MAAMM,MAAM,SAAS,CAAC,aAAa;YAAE,SAAS;QAAG;QAEhE,OAAOP,oBAAoBC;IAC7B,EAAE,OAAOe,OAAO;QACdC,QAAQ,KAAK,CAAC,gCAAgCD;QAC9C,MAAMA;IACR;AACF;AAEA,SAAS5B,sBAAsBU,QAAmB;IAChD,MAAMoB,eAAepB,SAAS,KAAK,CAAC;IAEpC,MAAMqB,QAAQD,aAAa,GAAG,CAAC,CAACE;QAC9B,MAAMC,OAAOD,AAAa,YAAbA,IAAI,IAAI,GAAe,UAAU;QAC9C,OAAO,GAAGC,KAAK,EAAE,EAAED,IAAI,KAAK,EAAE;IAChC;IAWA,OAAO,0BAA0BD,MAAM,IAAI,CAAC,QAAQ;AACtD;AAOO,MAAMG,4BAA4B,CACvCxB,WAEOA,SAAS,GAAG,CAAC,CAACyB;QACnB,IAAIC,MAAM,OAAO,CAACD,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,KAAKA,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,CAAC,MAAM,AAAD,IAAI,GAAG;YAClE,MAAME,UAAUF,QAAQ,OAAO,CAAC,GAAG,CAAC,CAACG;oBACFC;gBAAjC,IAAID,AAAc,gBAAdA,KAAK,IAAI,IAAK,SAAeC,CAAAA,kBAAAA,KAAK,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,GAAG,AAAD,GACjD,OAAO;oBACL,MAAM;oBACN,WAAWD,KAAK,SAAS,CAAC,GAAG;gBAC/B;gBAEF,OAAOA;YACT;YACA,OAAO;gBACL,MAAMH,QAAQ,IAAI;gBAClBE;YACF;QACF;QAEA,OAAOF;IACT;AAQK,MAAMK,iBAAiB,CAC5BC,IAEA,UAAUA,KACVA,AAAW,WAAXA,EAAE,IAAI,IACNL,MAAM,OAAO,CAACK,EAAE,OAAO,KACvBA,EAAE,OAAO,CAAC,IAAI,CACZ,CAACH;YAC+BC;eAA7BD,AAAc,gBAAdA,KAAK,IAAI,IAAK,SAAeC,CAAAA,kBAAAA,KAAK,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,GAAG,AAAD,KAC/CD,AAAc,kBAAdA,KAAK,IAAI,IAAsBA,KAAK,SAAS"}