{"version":3,"file":"ToolCallEngine.mjs","sources":["webpack://@ui-tars-test/agent-sdk/./src/ToolCallEngine.ts"],"sourcesContent":["import {\n  ToolCallEngine,\n  Tool,\n  ToolCallEnginePrepareRequestContext,\n  ChatCompletionCreateParams,\n  ChatCompletionAssistantMessageParam,\n  ChatCompletionChunk,\n  MultimodalToolCallResult,\n  AgentEventStream,\n  ChatCompletionMessageParam,\n  ChatCompletionMessageToolCall,\n  ParsedModelResponse,\n  StreamProcessingState,\n  StreamChunkResult,\n} from '@ui-tars-test/tarko-agent-interface';\nimport { DefaultActionParser } from '@ui-tars-test/action-parser';\nimport { GUI_ADAPTED_TOOL_NAME } from './constants';\nimport { ConsoleLogger, LogLevel } from '@agent-infra/logger';\nimport { serializeAction } from '@ui-tars-test/shared/utils';\nimport { CustomActionParser } from '@ui-tars-test/shared/types';\n\nconst defaultParser = new DefaultActionParser();\nconst defaultLogger = new ConsoleLogger('[GUIAgent:ToolCallEngine]', LogLevel.DEBUG);\n\n/**\n * GUIAgentToolCallEngine - Minimal prompt engineering tool call engine\n *\n * This is the simplest possible implementation of a tool call engine that:\n * 1. Uses prompt engineering to instruct the LLM to output tool calls in a specific format\n * 2. Parses tool calls from LLM response text using simple regex matching\n * 3. Does not support streaming (focuses on core functionality only)\n *\n * Format used: <tool_call>{\"name\": \"tool_name\", \"arguments\": {...}}</tool_call>\n */\nexport class GUIAgentToolCallEngine extends ToolCallEngine {\n  private customActionParser?: CustomActionParser;\n\n  constructor(customActionParser?: CustomActionParser) {\n    super();\n    this.customActionParser = customActionParser;\n  }\n\n  /**\n   * Prepare system prompt with tool information and instructions\n   */\n  preparePrompt(instructions: string, tools: Tool[]): string {\n    return instructions;\n  }\n\n  /**\n   * Prepare request parameters for the LLM\n   *\n   * FIXME: move to base tool call engine.\n   */\n  prepareRequest(context: ToolCallEnginePrepareRequestContext): ChatCompletionCreateParams {\n    defaultLogger.log(\n      \"【New Sys Prompt'】 System Prompt:\",\n      JSON.stringify(context.messages.find((m) => m.role === 'system')?.content || ''),\n    );\n    return {\n      model: context.model,\n      messages: context.messages,\n      temperature: context.temperature || 0.7,\n      stream: true,\n      // When tools are undefined (disabled), tool_choice MUST also be undefined\n      // Otherwise OpenAI/Azure will return 400 Bad Request\n      tool_choice: undefined,\n      tools: undefined,\n    };\n  }\n\n  /**\n   * Initialize processing state (minimal implementation)\n   *\n   * FIXME: move to base tool call engine.\n   */\n  initStreamProcessingState(): StreamProcessingState {\n    return {\n      contentBuffer: '',\n      toolCalls: [],\n      reasoningBuffer: '',\n      finishReason: null,\n    };\n  }\n\n  /**\n   * Process streaming chunks - simply accumulate content\n   *\n   * FIXME: make it optional\n   */\n  processStreamingChunk(\n    chunk: ChatCompletionChunk,\n    state: StreamProcessingState,\n  ): StreamChunkResult {\n    // For non-streaming requests, the entire response comes in one chunk\n    const delta = chunk.choices[0]?.delta;\n    // eslint-disable-next-line @typescript-eslint/no-explicit-any\n    const message = (chunk.choices[0] as any)?.message;\n\n    // Accumulate content from delta (streaming) or message (non-streaming)\n    const content = delta?.content || message?.content || '';\n    if (content) {\n      state.contentBuffer += content;\n    }\n\n    // Record finish reason\n    if (chunk.choices[0]?.finish_reason) {\n      state.finishReason = chunk.choices[0].finish_reason;\n    }\n\n    // Return incremental content without tool call detection during streaming\n    return {\n      content: content,\n      reasoningContent: '',\n      hasToolCallUpdate: false,\n      toolCalls: [],\n    };\n  }\n\n  /**\n   * Generate a tool call ID\n   */\n  private generateToolCallId(): string {\n    return `call_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;\n  }\n\n  /**\n   * Extract tool calls from complete response text\n   */\n  finalizeStreamProcessing(state: StreamProcessingState): ParsedModelResponse {\n    const fullContent = state.contentBuffer;\n    console.log('[DEBUG] Full content length:', fullContent.length);\n    console.log('[DEBUG] Full content prefix:', fullContent.slice(0, 100));\n    console.log('[DEBUG] Full content suffix:', fullContent.slice(-100));\n    defaultLogger.log(\"【New Sys Prompt'】 Model Response:\", fullContent);\n    defaultLogger.log('[finalizeStreamProcessing] fullContent', fullContent);\n\n    // Add explicit log to confirm XML parsing intent\n    console.log('[CLI DEBUG] [ToolCallEngine] Full model response received:', fullContent);\n\n    // Try custom action parser first if available\n    let parsedGUIResponse = null;\n    if (this.customActionParser) {\n      parsedGUIResponse = this.customActionParser(fullContent);\n      defaultLogger.log('[finalizeStreamProcessing] Using custom action parser');\n      console.log('[CLI DEBUG] [ToolCallEngine] Using custom action parser');\n    }\n\n    // Priority: Custom Regex Parser > Default Parser\n    // Check if the content contains the specific XML format with dynamic suffixes\n    if (/<seed:tool_call_never_used_/.test(fullContent)) {\n      console.log(\n        '[CLI DEBUG] [ToolCallEngine] Detected custom XML format. Attempting custom regex parser.',\n      );\n      try {\n        // eslint-disable-next-line @typescript-eslint/no-explicit-any\n        const actions: any[] = [];\n        // Regex to match <function_...=name>...</function...>\n        // Handles dynamic suffixes like _never_used_...\n        const functionRegex = /<function_[^=>]*=([a-zA-Z0-9_]+)>([\\s\\S]*?)<\\/function_[^>]*>/g;\n        let match;\n\n        while ((match = functionRegex.exec(fullContent)) !== null) {\n          const actionName = match[1];\n          const innerContent = match[2];\n          // eslint-disable-next-line @typescript-eslint/no-explicit-any\n          const args: any = {};\n\n          // Regex to match <parameter_...=key>value</parameter...>\n          const paramRegex = /<parameter_[^=>]*=([a-zA-Z0-9_]+)>([\\s\\S]*?)<\\/parameter_[^>]*>/g;\n          let paramMatch;\n          while ((paramMatch = paramRegex.exec(innerContent)) !== null) {\n            const key = paramMatch[1];\n            // eslint-disable-next-line @typescript-eslint/no-explicit-any\n            let value: any = paramMatch[2].trim();\n\n            // Special handling for coordinate parameters (point, start_point, end_point)\n            // Convert string \"x y\" to { raw: { x, y } } object structure expected by Operators\n            if (\n              ['point', 'start_point', 'end_point', 'start', 'end'].includes(key) &&\n              typeof value === 'string'\n            ) {\n              // Remove potential wrapping tags like <point>...</point>\n              value = value.replace(/^<[^>]+>([\\s\\S]*?)<\\/[^>]+>$/, '$1').trim();\n\n              // Match coordinates pattern: digits followed by comma or space followed by digits\n              // Supports: \"100 200\", \"100, 200\", \"(100, 200)\"\n              const coordsMatch = value.match(/(\\d+)[, ]+(\\d+)/);\n              if (coordsMatch) {\n                const x = parseInt(coordsMatch[1], 10);\n                const y = parseInt(coordsMatch[2], 10);\n                value = { raw: { x, y } };\n              }\n            }\n\n            args[key] = value;\n          }\n\n          actions.push({\n            type: actionName,\n            inputs: args,\n            thought: '',\n          });\n        }\n\n        // Also try to extract thought from <think_...>...</think_...>\n        const thoughtRegex = /<think_[^>]*>([\\s\\S]*?)<\\/think_[^>]*>/g;\n        const thoughtMatch = thoughtRegex.exec(fullContent);\n        const thoughtContent = thoughtMatch ? thoughtMatch[1].trim() : '';\n\n        if (actions.length > 0) {\n          console.log(\n            `[CLI DEBUG] [ToolCallEngine] Custom regex parser found ${actions.length} actions.`,\n          );\n          parsedGUIResponse = {\n            errorMessage: '',\n            rawContent: fullContent,\n            actions: actions,\n            reasoningContent: thoughtContent,\n          };\n        }\n      } catch (e) {\n        console.error('[CLI DEBUG] [ToolCallEngine] Custom regex parser error:', e);\n      }\n    }\n\n    // Fall back to default parser if regex parser didn't produce results\n    if (!parsedGUIResponse) {\n      console.log('[CLI DEBUG] [ToolCallEngine] Using default action parser (XML parser)');\n      parsedGUIResponse = defaultParser.parsePrediction(fullContent);\n      defaultLogger.log('[finalizeStreamProcessing] Using default action parser');\n    }\n\n    if (parsedGUIResponse) {\n      console.log(\n        '[CLI DEBUG] [ToolCallEngine] Parsed response:',\n        JSON.stringify(parsedGUIResponse, null, 2),\n      );\n    } else {\n      console.log('[CLI DEBUG] [ToolCallEngine] Parsing failed or returned null');\n    }\n\n    if (!parsedGUIResponse || parsedGUIResponse.errorMessage) {\n      return {\n        content: '',\n        rawContent: fullContent,\n        toolCalls: [\n          {\n            id: this.generateToolCallId(),\n            type: 'function',\n            function: {\n              name: GUI_ADAPTED_TOOL_NAME,\n              arguments: JSON.stringify({\n                action: '',\n                step: '',\n                thought: '',\n                operator_action: null,\n                errorMessage:\n                  parsedGUIResponse?.errorMessage ?? 'Failed to parse GUI Action from output',\n              }),\n            },\n          },\n        ],\n        finishReason: 'tool_calls',\n      };\n    }\n\n    const toolCalls: ChatCompletionMessageToolCall[] = [];\n\n    let finished = false;\n    let finishMessage: string | null = null;\n    for (const action of parsedGUIResponse.actions) {\n      if (action.type === 'finished') {\n        finished = true;\n        finishMessage = action.inputs?.content ?? null;\n        continue;\n      }\n      toolCalls.push({\n        id: this.generateToolCallId(),\n        type: 'function',\n        function: {\n          name: GUI_ADAPTED_TOOL_NAME,\n          arguments: JSON.stringify({\n            action: serializeAction(action),\n            step: '',\n            thought: parsedGUIResponse.reasoningContent ?? '',\n            operator_action: action,\n          }),\n        },\n      });\n    }\n\n    const content = finishMessage ?? '';\n    const reasoningContent = parsedGUIResponse.reasoningContent ?? '';\n    const contentForWebUI = content.replace(/\\\\n|\\n/g, '<br>');\n    const reasoningContentForWebUI = reasoningContent.replace(/\\\\n|\\n/g, '<br>');\n\n    // No tool calls found - return regular response\n    return {\n      content: contentForWebUI,\n      rawContent: fullContent,\n      reasoningContent: reasoningContentForWebUI,\n      toolCalls,\n      finishReason: toolCalls.length > 0 && !finished ? 'tool_calls' : 'stop',\n    };\n  }\n\n  /**\n   * Build assistant message for conversation history\n   * For PE engines, we preserve the raw content including tool call markup\n   *\n   * FIXME: move to base tool call engine.\n   */\n  buildHistoricalAssistantMessage(\n    currentLoopAssistantEvent: AgentEventStream.AssistantMessageEvent,\n  ): ChatCompletionAssistantMessageParam {\n    return {\n      role: 'assistant',\n      content: currentLoopAssistantEvent.rawContent || currentLoopAssistantEvent.content,\n    };\n  }\n\n  /**\n   * Build tool result messages as user messages\n   * PE engines format tool results as user input for next iteration\n   *\n   * FIXME: move to base tool call engine.\n   */\n  buildHistoricalToolCallResultMessages(\n    toolCallResults: MultimodalToolCallResult[],\n  ): ChatCompletionMessageParam[] {\n    return toolCallResults.map((result) => {\n      // Extract text content from multimodal result\n      const textContent = result.content\n        .filter((part) => part.type === 'text')\n        .map((part) => (part as { text: string }).text)\n        .join('');\n\n      return {\n        role: 'user',\n        content: `Tool \"${result.toolName}\" result:\\n${textContent}`,\n      };\n    });\n  }\n}\n"],"names":["defaultParser","DefaultActionParser","defaultLogger","ConsoleLogger","LogLevel","GUIAgentToolCallEngine","ToolCallEngine","instructions","tools","context","_context_messages_find","JSON","m","undefined","chunk","state","_chunk_choices_","_chunk_choices_1","_chunk_choices_2","delta","message","content","Date","Math","fullContent","console","parsedGUIResponse","actions","functionRegex","match","actionName","innerContent","args","paramRegex","paramMatch","key","value","coordsMatch","x","parseInt","y","thoughtRegex","thoughtMatch","thoughtContent","e","GUI_ADAPTED_TOOL_NAME","toolCalls","finished","finishMessage","action","_action_inputs","serializeAction","reasoningContent","contentForWebUI","reasoningContentForWebUI","currentLoopAssistantEvent","toolCallResults","result","textContent","part","customActionParser"],"mappings":";;;;;;;;;;;;;;;;;;;AAqBA,MAAMA,gBAAgB,IAAIC;AAC1B,MAAMC,gBAAgB,IAAIC,cAAc,6BAA6BC,SAAS,KAAK;AAY5E,MAAMC,+BAA+BC;IAW1C,cAAcC,YAAoB,EAAEC,KAAa,EAAU;QACzD,OAAOD;IACT;IAOA,eAAeE,OAA4C,EAA8B;YAGtEC;QAFjBR,cAAc,GAAG,CACf,8CACAS,KAAK,SAAS,CAACD,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,QAAQ,CAAC,IAAI,CAAC,CAACE,IAAMA,AAAW,aAAXA,EAAE,IAAI,CAAa,IAAhDF,KAAAA,IAAAA,uBAAmD,OAAO,AAAD,KAAK;QAE/E,OAAO;YACL,OAAOD,QAAQ,KAAK;YACpB,UAAUA,QAAQ,QAAQ;YAC1B,aAAaA,QAAQ,WAAW,IAAI;YACpC,QAAQ;YAGR,aAAaI;YACb,OAAOA;QACT;IACF;IAOA,4BAAmD;QACjD,OAAO;YACL,eAAe;YACf,WAAW,EAAE;YACb,iBAAiB;YACjB,cAAc;QAChB;IACF;IAOA,sBACEC,KAA0B,EAC1BC,KAA4B,EACT;YAELC,iBAEGC,kBASbC;QAXJ,MAAMC,QAAQ,QAAAH,CAAAA,kBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,gBAAkB,KAAK;QAErC,MAAMI,UAAU,QAACH,CAAAA,mBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,iBAA0B,OAAO;QAGlD,MAAMI,UAAUF,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,OAAO,AAAD,KAAKC,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD,KAAK;QACtD,IAAIC,SACFN,MAAM,aAAa,IAAIM;QAIzB,IAAI,QAAAH,CAAAA,mBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,iBAAkB,aAAa,EACjCH,MAAM,YAAY,GAAGD,MAAM,OAAO,CAAC,EAAE,CAAC,aAAa;QAIrD,OAAO;YACL,SAASO;YACT,kBAAkB;YAClB,mBAAmB;YACnB,WAAW,EAAE;QACf;IACF;IAKQ,qBAA6B;QACnC,OAAO,CAAC,KAAK,EAAEC,KAAK,GAAG,GAAG,CAAC,EAAEC,KAAK,MAAM,GAAG,QAAQ,CAAC,IAAI,SAAS,CAAC,GAAG,KAAK;IAC5E;IAKA,yBAAyBR,KAA4B,EAAuB;QAC1E,MAAMS,cAAcT,MAAM,aAAa;QACvCU,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,MAAM;QAC9DC,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,KAAK,CAAC,GAAG;QACjEC,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,KAAK,CAAC;QAC9DtB,cAAc,GAAG,CAAC,+CAAqCsB;QACvDtB,cAAc,GAAG,CAAC,0CAA0CsB;QAG5DC,QAAQ,GAAG,CAAC,8DAA8DD;QAG1E,IAAIE,oBAAoB;QACxB,IAAI,IAAI,CAAC,kBAAkB,EAAE;YAC3BA,oBAAoB,IAAI,CAAC,kBAAkB,CAACF;YAC5CtB,cAAc,GAAG,CAAC;YAClBuB,QAAQ,GAAG,CAAC;QACd;QAIA,IAAI,8BAA8B,IAAI,CAACD,cAAc;YACnDC,QAAQ,GAAG,CACT;YAEF,IAAI;gBAEF,MAAME,UAAiB,EAAE;gBAGzB,MAAMC,gBAAgB;gBACtB,IAAIC;gBAEJ,MAAQA,AAA6C,SAA7CA,CAAAA,QAAQD,cAAc,IAAI,CAACJ,YAAW,EAAa;oBACzD,MAAMM,aAAaD,KAAK,CAAC,EAAE;oBAC3B,MAAME,eAAeF,KAAK,CAAC,EAAE;oBAE7B,MAAMG,OAAY,CAAC;oBAGnB,MAAMC,aAAa;oBACnB,IAAIC;oBACJ,MAAQA,AAAgD,SAAhDA,CAAAA,aAAaD,WAAW,IAAI,CAACF,aAAY,EAAa;wBAC5D,MAAMI,MAAMD,UAAU,CAAC,EAAE;wBAEzB,IAAIE,QAAaF,UAAU,CAAC,EAAE,CAAC,IAAI;wBAInC,IACE;4BAAC;4BAAS;4BAAe;4BAAa;4BAAS;yBAAM,CAAC,QAAQ,CAACC,QAC/D,AAAiB,YAAjB,OAAOC,OACP;4BAEAA,QAAQA,MAAM,OAAO,CAAC,gCAAgC,MAAM,IAAI;4BAIhE,MAAMC,cAAcD,MAAM,KAAK,CAAC;4BAChC,IAAIC,aAAa;gCACf,MAAMC,IAAIC,SAASF,WAAW,CAAC,EAAE,EAAE;gCACnC,MAAMG,IAAID,SAASF,WAAW,CAAC,EAAE,EAAE;gCACnCD,QAAQ;oCAAE,KAAK;wCAAEE;wCAAGE;oCAAE;gCAAE;4BAC1B;wBACF;wBAEAR,IAAI,CAACG,IAAI,GAAGC;oBACd;oBAEAT,QAAQ,IAAI,CAAC;wBACX,MAAMG;wBACN,QAAQE;wBACR,SAAS;oBACX;gBACF;gBAGA,MAAMS,eAAe;gBACrB,MAAMC,eAAeD,aAAa,IAAI,CAACjB;gBACvC,MAAMmB,iBAAiBD,eAAeA,YAAY,CAAC,EAAE,CAAC,IAAI,KAAK;gBAE/D,IAAIf,QAAQ,MAAM,GAAG,GAAG;oBACtBF,QAAQ,GAAG,CACT,CAAC,uDAAuD,EAAEE,QAAQ,MAAM,CAAC,SAAS,CAAC;oBAErFD,oBAAoB;wBAClB,cAAc;wBACd,YAAYF;wBACZ,SAASG;wBACT,kBAAkBgB;oBACpB;gBACF;YACF,EAAE,OAAOC,GAAG;gBACVnB,QAAQ,KAAK,CAAC,2DAA2DmB;YAC3E;QACF;QAGA,IAAI,CAAClB,mBAAmB;YACtBD,QAAQ,GAAG,CAAC;YACZC,oBAAoB1B,cAAc,eAAe,CAACwB;YAClDtB,cAAc,GAAG,CAAC;QACpB;QAEA,IAAIwB,mBACFD,QAAQ,GAAG,CACT,iDACAd,KAAK,SAAS,CAACe,mBAAmB,MAAM;aAG1CD,QAAQ,GAAG,CAAC;QAGd,IAAI,CAACC,qBAAqBA,kBAAkB,YAAY,EACtD,OAAO;YACL,SAAS;YACT,YAAYF;YACZ,WAAW;gBACT;oBACE,IAAI,IAAI,CAAC,kBAAkB;oBAC3B,MAAM;oBACN,UAAU;wBACR,MAAMqB;wBACN,WAAWlC,KAAK,SAAS,CAAC;4BACxB,QAAQ;4BACR,MAAM;4BACN,SAAS;4BACT,iBAAiB;4BACjB,cACEe,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,YAAY,AAAD,KAAK;wBACvC;oBACF;gBACF;aACD;YACD,cAAc;QAChB;QAGF,MAAMoB,YAA6C,EAAE;QAErD,IAAIC,WAAW;QACf,IAAIC,gBAA+B;QACnC,KAAK,MAAMC,UAAUvB,kBAAkB,OAAO,CAAE;YAC9C,IAAIuB,AAAgB,eAAhBA,OAAO,IAAI,EAAiB;oBAEdC;gBADhBH,WAAW;gBACXC,gBAAgBE,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,MAAM,AAAD,IAAZA,KAAAA,IAAAA,eAAe,OAAO,AAAD,KAAK;gBAC1C;YACF;YACAJ,UAAU,IAAI,CAAC;gBACb,IAAI,IAAI,CAAC,kBAAkB;gBAC3B,MAAM;gBACN,UAAU;oBACR,MAAMD;oBACN,WAAWlC,KAAK,SAAS,CAAC;wBACxB,QAAQwC,gBAAgBF;wBACxB,MAAM;wBACN,SAASvB,kBAAkB,gBAAgB,IAAI;wBAC/C,iBAAiBuB;oBACnB;gBACF;YACF;QACF;QAEA,MAAM5B,UAAU2B,iBAAiB;QACjC,MAAMI,mBAAmB1B,kBAAkB,gBAAgB,IAAI;QAC/D,MAAM2B,kBAAkBhC,QAAQ,OAAO,CAAC,WAAW;QACnD,MAAMiC,2BAA2BF,iBAAiB,OAAO,CAAC,WAAW;QAGrE,OAAO;YACL,SAASC;YACT,YAAY7B;YACZ,kBAAkB8B;YAClBR;YACA,cAAcA,UAAU,MAAM,GAAG,KAAK,CAACC,WAAW,eAAe;QACnE;IACF;IAQA,gCACEQ,yBAAiE,EAC5B;QACrC,OAAO;YACL,MAAM;YACN,SAASA,0BAA0B,UAAU,IAAIA,0BAA0B,OAAO;QACpF;IACF;IAQA,sCACEC,eAA2C,EACb;QAC9B,OAAOA,gBAAgB,GAAG,CAAC,CAACC;YAE1B,MAAMC,cAAcD,OAAO,OAAO,CAC/B,MAAM,CAAC,CAACE,OAASA,AAAc,WAAdA,KAAK,IAAI,EAC1B,GAAG,CAAC,CAACA,OAAUA,KAA0B,IAAI,EAC7C,IAAI,CAAC;YAER,OAAO;gBACL,MAAM;gBACN,SAAS,CAAC,MAAM,EAAEF,OAAO,QAAQ,CAAC,WAAW,EAAEC,aAAa;YAC9D;QACF;IACF;IAlTA,YAAYE,kBAAuC,CAAE;QACnD,KAAK,IAHP,uBAAQ,sBAAR;QAIE,IAAI,CAAC,kBAAkB,GAAGA;IAC5B;AAgTF"}