{"version":3,"file":"Model.mjs","sources":["webpack://@ui-tars/sdk/./src/Model.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport OpenAI, { type ClientOptions } from 'openai';\nimport {\n  type ChatCompletionCreateParamsNonStreaming,\n  type ChatCompletionCreateParamsBase,\n  type ChatCompletionMessageParam,\n} from 'openai/resources/chat/completions';\nimport { actionParser } from '@ui-tars/action-parser';\n\nimport { useContext } from './context/useContext';\nimport { Model, type InvokeParams, type InvokeOutput } from './types';\n\nimport {\n  preprocessResizeImage,\n  convertToOpenAIMessages,\n  convertToResponseApiInput,\n  isMessageImage,\n} from './utils';\nimport { DEFAULT_FACTORS } from './constants';\nimport {\n  UITarsModelVersion,\n  MAX_PIXELS_V1_0,\n  MAX_PIXELS_V1_5,\n  MAX_PIXELS_DOUBAO,\n} from '@ui-tars/shared/types';\nimport type {\n  ResponseCreateParamsNonStreaming,\n  ResponseInputItem,\n} from 'openai/resources/responses/responses';\n\ntype OpenAIChatCompletionCreateParams = Omit<ClientOptions, 'maxRetries'> &\n  Pick<\n    ChatCompletionCreateParamsBase,\n    'model' | 'max_tokens' | 'temperature' | 'top_p'\n  >;\n\nexport interface UITarsModelConfig extends OpenAIChatCompletionCreateParams {\n  /** Whether to use OpenAI Response API instead of Chat Completions API */\n  useResponsesApi?: boolean;\n}\n\nexport interface ThinkingVisionProModelConfig\n  extends ChatCompletionCreateParamsNonStreaming {\n  thinking?: {\n    type: 'enabled' | 'disabled';\n  };\n}\n\nexport class UITarsModel extends Model {\n  constructor(protected readonly modelConfig: UITarsModelConfig) {\n    super();\n    this.modelConfig = modelConfig;\n  }\n\n  get useResponsesApi(): boolean {\n    return this.modelConfig.useResponsesApi ?? false;\n  }\n  private headImageContext: {\n    messageIndex: number;\n    responseIds: string[];\n  } | null = null;\n\n  /** [widthFactor, heightFactor] */\n  get factors(): [number, number] {\n    return DEFAULT_FACTORS;\n  }\n\n  get modelName(): string {\n    return this.modelConfig.model ?? 'unknown';\n  }\n\n  /**\n   * reset the model state\n   */\n  reset() {\n    this.headImageContext = null;\n  }\n\n  /**\n   * call real LLM / VLM Model\n   * @param params\n   * @param options\n   * @returns\n   */\n  protected async invokeModelProvider(\n    uiTarsVersion: UITarsModelVersion = UITarsModelVersion.V1_0,\n    params: {\n      messages: Array<ChatCompletionMessageParam>;\n      previousResponseId?: string;\n    },\n    options: {\n      signal?: AbortSignal;\n    },\n    headers?: Record<string, string>,\n  ): Promise<{\n    prediction: string;\n    costTime?: number;\n    costTokens?: number;\n    responseId?: string;\n  }> {\n    const { logger } = useContext();\n    const { messages, previousResponseId } = params;\n    const {\n      baseURL,\n      apiKey,\n      model,\n      max_tokens = uiTarsVersion == UITarsModelVersion.V1_5 ? 65535 : 1000,\n      temperature = 0,\n      top_p = 0.7,\n      ...restOptions\n    } = this.modelConfig;\n\n    const openai = new OpenAI({\n      ...restOptions,\n      maxRetries: 0,\n      baseURL,\n      apiKey,\n    });\n\n    const createCompletionPrams: ChatCompletionCreateParamsNonStreaming = {\n      model,\n      messages,\n      stream: false,\n      seed: null,\n      stop: null,\n      frequency_penalty: null,\n      presence_penalty: null,\n      // custom options\n      max_tokens,\n      temperature,\n      top_p,\n    };\n\n    const createCompletionPramsThinkingVp: ThinkingVisionProModelConfig = {\n      ...createCompletionPrams,\n      thinking: {\n        type: 'disabled',\n      },\n    };\n\n    const startTime = Date.now();\n\n    if (this.modelConfig.useResponsesApi) {\n      const lastAssistantIndex = messages.findLastIndex(\n        (c) => c.role === 'assistant',\n      );\n      logger.info('[ResponseAPI] lastAssistantIndex: ', lastAssistantIndex);\n      // incremental messages\n      const inputs = convertToResponseApiInput(\n        lastAssistantIndex > -1\n          ? messages.slice(lastAssistantIndex + 1)\n          : messages,\n      );\n\n      // find the first image message\n      const headImageMessageIndex = messages.findIndex(isMessageImage);\n      if (\n        this.headImageContext?.responseIds.length &&\n        this.headImageContext?.messageIndex !== headImageMessageIndex\n      ) {\n        // The image window has slid. Delete the first image message.\n        logger.info(\n          '[ResponseAPI] should [delete]: ',\n          this.headImageContext,\n          'headImageMessageIndex',\n          headImageMessageIndex,\n        );\n        const headImageResponseId = this.headImageContext.responseIds.shift();\n\n        if (headImageResponseId) {\n          const deletedResponse = await openai.responses.delete(\n            headImageResponseId,\n            {\n              headers,\n            },\n          );\n          logger.info(\n            '[ResponseAPI] [deletedResponse]: ',\n            headImageResponseId,\n            deletedResponse,\n          );\n        }\n      }\n\n      let result;\n      let responseId = previousResponseId;\n      for (const input of inputs) {\n        const truncated = JSON.stringify(\n          [input],\n          (key, value) => {\n            if (typeof value === 'string' && value.startsWith('data:image/')) {\n              return value.slice(0, 50) + '...[truncated]';\n            }\n            return value;\n          },\n          2,\n        );\n        const responseParams: ResponseCreateParamsNonStreaming = {\n          input: [input],\n          model,\n          temperature,\n          top_p,\n          stream: false,\n          max_output_tokens: max_tokens,\n          ...(responseId && {\n            previous_response_id: responseId,\n          }),\n          // @ts-expect-error\n          thinking: {\n            type: 'disabled',\n          },\n        };\n        logger.info(\n          '[ResponseAPI] [input]: ',\n          truncated,\n          'previous_response_id',\n          responseParams?.previous_response_id,\n          'headImageMessageIndex',\n          headImageMessageIndex,\n        );\n\n        result = await openai.responses.create(responseParams, {\n          ...options,\n          timeout: 1000 * 30,\n          headers,\n        });\n        logger.info('[ResponseAPI] [result]: ', result);\n        responseId = result?.id;\n        logger.info('[ResponseAPI] [responseId]: ', responseId);\n\n        // head image changed\n        if (responseId && isMessageImage(input)) {\n          this.headImageContext = {\n            messageIndex: headImageMessageIndex,\n            responseIds: [\n              ...(this.headImageContext?.responseIds || []),\n              responseId,\n            ],\n          };\n        }\n\n        logger.info(\n          '[ResponseAPI] [headImageContext]: ',\n          this.headImageContext,\n        );\n      }\n\n      return {\n        prediction: result?.output_text ?? '',\n        costTime: Date.now() - startTime,\n        costTokens: result?.usage?.total_tokens ?? 0,\n        responseId,\n      };\n    }\n\n    // Use Chat Completions API if not using Response API\n    const result = await openai.chat.completions.create(\n      createCompletionPramsThinkingVp,\n      {\n        ...options,\n        timeout: 1000 * 30,\n        headers,\n      },\n    );\n\n    return {\n      prediction: result.choices?.[0]?.message?.content ?? '',\n      costTime: Date.now() - startTime,\n      costTokens: result.usage?.total_tokens ?? 0,\n    };\n  }\n\n  async invoke(params: InvokeParams): Promise<InvokeOutput> {\n    const {\n      conversations,\n      images,\n      screenContext,\n      scaleFactor,\n      uiTarsVersion,\n      headers,\n      previousResponseId,\n    } = params;\n    const { logger, signal } = useContext();\n\n    logger?.info(\n      `[UITarsModel] invoke: screenContext=${JSON.stringify(screenContext)}, scaleFactor=${scaleFactor}, uiTarsVersion=${uiTarsVersion}, useResponsesApi=${this.modelConfig.useResponsesApi}`,\n    );\n\n    const maxPixels =\n      uiTarsVersion === UITarsModelVersion.V1_5\n        ? MAX_PIXELS_V1_5\n        : uiTarsVersion === UITarsModelVersion.DOUBAO_1_5_15B ||\n            uiTarsVersion === UITarsModelVersion.DOUBAO_1_5_20B\n          ? MAX_PIXELS_DOUBAO\n          : MAX_PIXELS_V1_0;\n    const compressedImages = await Promise.all(\n      images.map((image) => preprocessResizeImage(image, maxPixels)),\n    );\n\n    const messages = convertToOpenAIMessages({\n      conversations,\n      images: compressedImages,\n    });\n\n    const startTime = Date.now();\n    const result = await this.invokeModelProvider(\n      uiTarsVersion,\n      {\n        messages,\n        previousResponseId,\n      },\n      {\n        signal,\n      },\n      headers,\n    )\n      .catch((e) => {\n        logger?.error('[UITarsModel] error', e);\n        throw e;\n      })\n      .finally(() => {\n        logger?.info(`[UITarsModel cost]: ${Date.now() - startTime}ms`);\n      });\n\n    if (!result.prediction) {\n      const err = new Error();\n      err.name = 'vlm response error';\n      err.stack = JSON.stringify(result) ?? 'no message';\n      logger?.error(err);\n      throw err;\n    }\n\n    const { prediction, costTime, costTokens, responseId } = result;\n\n    try {\n      const { parsed: parsedPredictions } = actionParser({\n        prediction,\n        factor: this.factors,\n        screenContext,\n        scaleFactor,\n        modelVer: uiTarsVersion,\n      });\n      return {\n        prediction,\n        parsedPredictions,\n        costTime,\n        costTokens,\n        responseId,\n      };\n    } catch (error) {\n      logger?.error('[UITarsModel] error', error);\n      return {\n        prediction,\n        parsedPredictions: [],\n        responseId,\n      };\n    }\n  }\n}\n"],"names":["UITarsModel","Model","DEFAULT_FACTORS","uiTarsVersion","UITarsModelVersion","params","options","headers","_result_choices__message","_result_usage","logger","useContext","messages","previousResponseId","baseURL","apiKey","model","max_tokens","temperature","top_p","restOptions","openai","OpenAI","createCompletionPrams","createCompletionPramsThinkingVp","startTime","Date","_this_headImageContext","_this_headImageContext1","_result_usage1","lastAssistantIndex","c","inputs","convertToResponseApiInput","headImageMessageIndex","isMessageImage","headImageResponseId","deletedResponse","result","responseId","input","truncated","JSON","key","value","responseParams","_this_headImageContext2","conversations","images","screenContext","scaleFactor","signal","maxPixels","MAX_PIXELS_V1_5","MAX_PIXELS_DOUBAO","MAX_PIXELS_V1_0","compressedImages","Promise","image","preprocessResizeImage","convertToOpenAIMessages","e","err","Error","prediction","costTime","costTokens","parsedPredictions","actionParser","error","modelConfig"],"mappings":";;;;;;;;;;;AAGC;;;;;;;;;;AAgDM,MAAMA,oBAAoBC;IAM/B,IAAI,kBAA2B;QAC7B,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,IAAI;IAC7C;IAOA,IAAI,UAA4B;QAC9B,OAAOC;IACT;IAEA,IAAI,YAAoB;QACtB,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,IAAI;IACnC;IAKA,QAAQ;QACN,IAAI,CAAC,gBAAgB,GAAG;IAC1B;IAQA,MAAgB,oBACdC,gBAAoCC,mBAAmB,IAAI,EAC3DC,MAGC,EACDC,OAEC,EACDC,OAAgC,EAM/B;YAuKaC,0BAAAA,kBAAAA,iBAEAC;QAxKd,MAAM,EAAEC,MAAM,EAAE,GAAGC;QACnB,MAAM,EAAEC,QAAQ,EAAEC,kBAAkB,EAAE,GAAGR;QACzC,MAAM,EACJS,OAAO,EACPC,MAAM,EACNC,KAAK,EACLC,aAAad,iBAAiBC,mBAAmB,IAAI,GAAG,QAAQ,IAAI,EACpEc,cAAc,CAAC,EACfC,QAAQ,GAAG,EACX,GAAGC,aACJ,GAAG,IAAI,CAAC,WAAW;QAEpB,MAAMC,SAAS,IAAIC,SAAO;YACxB,GAAGF,WAAW;YACd,YAAY;YACZN;YACAC;QACF;QAEA,MAAMQ,wBAAgE;YACpEP;YACAJ;YACA,QAAQ;YACR,MAAM;YACN,MAAM;YACN,mBAAmB;YACnB,kBAAkB;YAElBK;YACAC;YACAC;QACF;QAEA,MAAMK,kCAAgE;YACpE,GAAGD,qBAAqB;YACxB,UAAU;gBACR,MAAM;YACR;QACF;QAEA,MAAME,YAAYC,KAAK,GAAG;QAE1B,IAAI,IAAI,CAAC,WAAW,CAAC,eAAe,EAAE;gBAelCC,wBACAC,yBA4FYC;YA3Gd,MAAMC,qBAAqBlB,SAAS,aAAa,CAC/C,CAACmB,IAAMA,AAAW,gBAAXA,EAAE,IAAI;YAEfrB,OAAO,IAAI,CAAC,sCAAsCoB;YAElD,MAAME,SAASC,0BACbH,qBAAqB,KACjBlB,SAAS,KAAK,CAACkB,qBAAqB,KACpClB;YAIN,MAAMsB,wBAAwBtB,SAAS,SAAS,CAACuB;YACjD,IACER,AAAAA,SAAAA,CAAAA,yBAAAA,IAAI,CAAC,gBAAgB,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,WAAW,CAAC,MAAM,AAAD,KACxCC,AAAAA,SAAAA,CAAAA,0BAAAA,IAAI,CAAC,gBAAgB,AAAD,IAApBA,KAAAA,IAAAA,wBAAuB,YAAY,AAAD,MAAMM,uBACxC;gBAEAxB,OAAO,IAAI,CACT,mCACA,IAAI,CAAC,gBAAgB,EACrB,yBACAwB;gBAEF,MAAME,sBAAsB,IAAI,CAAC,gBAAgB,CAAC,WAAW,CAAC,KAAK;gBAEnE,IAAIA,qBAAqB;oBACvB,MAAMC,kBAAkB,MAAMhB,OAAO,SAAS,CAAC,MAAM,CACnDe,qBACA;wBACE7B;oBACF;oBAEFG,OAAO,IAAI,CACT,qCACA0B,qBACAC;gBAEJ;YACF;YAEA,IAAIC;YACJ,IAAIC,aAAa1B;YACjB,KAAK,MAAM2B,SAASR,OAAQ;gBAC1B,MAAMS,YAAYC,KAAK,SAAS,CAC9B;oBAACF;iBAAM,EACP,CAACG,KAAKC;oBACJ,IAAI,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,UAAU,CAAC,gBAChD,OAAOA,MAAM,KAAK,CAAC,GAAG,MAAM;oBAE9B,OAAOA;gBACT,GACA;gBAEF,MAAMC,iBAAmD;oBACvD,OAAO;wBAACL;qBAAM;oBACdxB;oBACAE;oBACAC;oBACA,QAAQ;oBACR,mBAAmBF;oBACnB,GAAIsB,cAAc;wBAChB,sBAAsBA;oBACxB,CAAC;oBAED,UAAU;wBACR,MAAM;oBACR;gBACF;gBACA7B,OAAO,IAAI,CACT,2BACA+B,WACA,wBACAI,QAAAA,iBAAAA,KAAAA,IAAAA,eAAgB,oBAAoB,EACpC,yBACAX;gBAGFI,SAAS,MAAMjB,OAAO,SAAS,CAAC,MAAM,CAACwB,gBAAgB;oBACrD,GAAGvC,OAAO;oBACV,SAAS;oBACTC;gBACF;gBACAG,OAAO,IAAI,CAAC,4BAA4B4B;gBACxCC,aAAaD,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,EAAE;gBACvB5B,OAAO,IAAI,CAAC,gCAAgC6B;gBAG5C,IAAIA,cAAcJ,eAAeK,QAAQ;wBAI/BM;oBAHR,IAAI,CAAC,gBAAgB,GAAG;wBACtB,cAAcZ;wBACd,aAAa;+BACPY,AAAAA,SAAAA,CAAAA,0BAAAA,IAAI,CAAC,gBAAgB,AAAD,IAApBA,KAAAA,IAAAA,wBAAuB,WAAW,AAAD,KAAK,EAAE;4BAC5CP;yBACD;oBACH;gBACF;gBAEA7B,OAAO,IAAI,CACT,sCACA,IAAI,CAAC,gBAAgB;YAEzB;YAEA,OAAO;gBACL,YAAY4B,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,WAAW,AAAD,KAAK;gBACnC,UAAUZ,KAAK,GAAG,KAAKD;gBACvB,YAAYI,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,QAAAA,CAAAA,iBAAAA,OAAQ,KAAK,AAAD,IAAZA,KAAAA,IAAAA,eAAe,YAAY,AAAD,KAAK;gBAC3CU;YACF;QACF;QAGA,MAAMD,SAAS,MAAMjB,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,CACjDG,iCACA;YACE,GAAGlB,OAAO;YACV,SAAS;YACTC;QACF;QAGF,OAAO;YACL,YAAYC,AAAAA,SAAAA,CAAAA,kBAAAA,OAAO,OAAO,AAAD,IAAbA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAgB,CAAC,EAAE,AAAD,IAAlBA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAqB,OAAO,AAAD,IAA3BA,KAAAA,IAAAA,yBAA8B,OAAO,AAAD,KAAK;YACrD,UAAUkB,KAAK,GAAG,KAAKD;YACvB,YAAYhB,AAAAA,SAAAA,CAAAA,gBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,cAAc,YAAY,AAAD,KAAK;QAC5C;IACF;IAEA,MAAM,OAAOJ,MAAoB,EAAyB;QACxD,MAAM,EACJ0C,aAAa,EACbC,MAAM,EACNC,aAAa,EACbC,WAAW,EACX/C,aAAa,EACbI,OAAO,EACPM,kBAAkB,EACnB,GAAGR;QACJ,MAAM,EAAEK,MAAM,EAAEyC,MAAM,EAAE,GAAGxC;QAE3BD,QAAAA,UAAAA,OAAQ,IAAI,CACV,CAAC,oCAAoC,EAAEgC,KAAK,SAAS,CAACO,eAAe,cAAc,EAAEC,YAAY,gBAAgB,EAAE/C,cAAc,kBAAkB,EAAE,IAAI,CAAC,WAAW,CAAC,eAAe,EAAE;QAGzL,MAAMiD,YACJjD,kBAAkBC,mBAAmB,IAAI,GACrCiD,kBACAlD,kBAAkBC,mBAAmB,cAAc,IACjDD,kBAAkBC,mBAAmB,cAAc,GACnDkD,oBACAC;QACR,MAAMC,mBAAmB,MAAMC,QAAQ,GAAG,CACxCT,OAAO,GAAG,CAAC,CAACU,QAAUC,sBAAsBD,OAAON;QAGrD,MAAMxC,WAAWgD,wBAAwB;YACvCb;YACA,QAAQS;QACV;QAEA,MAAM/B,YAAYC,KAAK,GAAG;QAC1B,MAAMY,SAAS,MAAM,IAAI,CAAC,mBAAmB,CAC3CnC,eACA;YACES;YACAC;QACF,GACA;YACEsC;QACF,GACA5C,SAEC,KAAK,CAAC,CAACsD;YACNnD,QAAAA,UAAAA,OAAQ,KAAK,CAAC,uBAAuBmD;YACrC,MAAMA;QACR,GACC,OAAO,CAAC;YACPnD,QAAAA,UAAAA,OAAQ,IAAI,CAAC,CAAC,oBAAoB,EAAEgB,KAAK,GAAG,KAAKD,UAAU,EAAE,CAAC;QAChE;QAEF,IAAI,CAACa,OAAO,UAAU,EAAE;YACtB,MAAMwB,MAAM,IAAIC;YAChBD,IAAI,IAAI,GAAG;YACXA,IAAI,KAAK,GAAGpB,KAAK,SAAS,CAACJ,WAAW;YACtC5B,QAAAA,UAAAA,OAAQ,KAAK,CAACoD;YACd,MAAMA;QACR;QAEA,MAAM,EAAEE,UAAU,EAAEC,QAAQ,EAAEC,UAAU,EAAE3B,UAAU,EAAE,GAAGD;QAEzD,IAAI;YACF,MAAM,EAAE,QAAQ6B,iBAAiB,EAAE,GAAGC,aAAa;gBACjDJ;gBACA,QAAQ,IAAI,CAAC,OAAO;gBACpBf;gBACAC;gBACA,UAAU/C;YACZ;YACA,OAAO;gBACL6D;gBACAG;gBACAF;gBACAC;gBACA3B;YACF;QACF,EAAE,OAAO8B,OAAO;YACd3D,QAAAA,UAAAA,OAAQ,KAAK,CAAC,uBAAuB2D;YACrC,OAAO;gBACLL;gBACA,mBAAmB,EAAE;gBACrBzB;YACF;QACF;IACF;IApTA,YAA+B+B,WAA8B,CAAE;QAC7D,KAAK,mDAOP,uBAAQ,oBAAR,cAR+BA,WAAW,GAAXA,aAAAA,IAAAA,CAQvB,gBAAgB,GAGb;QATT,IAAI,CAAC,WAAW,GAAGA;IACrB;AAkTF"}