{"version":3,"file":"computerUse.cjs","names":["z","ToolMessage"],"sources":["../../src/tools/computerUse.ts"],"sourcesContent":["/* oxlint-disable @typescript-eslint/no-explicit-any */\nimport { z } from \"zod/v4\";\nimport { OpenAI as OpenAIClient } from \"openai\";\nimport { tool, type DynamicStructuredTool } from \"@langchain/core/tools\";\nimport { type ToolRuntime } from \"@langchain/core/tools\";\nimport {\n  ToolMessage,\n  type AIMessage,\n  type BaseMessage,\n} from \"@langchain/core/messages\";\n\n/**\n * The type of computer environment to control.\n */\nexport type ComputerUseEnvironment =\n  | \"browser\"\n  | \"mac\"\n  | \"windows\"\n  | \"linux\"\n  | \"ubuntu\";\n\n/**\n * Re-export action types from OpenAI SDK for convenience.\n */\nexport type ComputerUseClickAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.Click;\nexport type ComputerUseDoubleClickAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.DoubleClick;\nexport type ComputerUseDragAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.Drag;\nexport type ComputerUseKeypressAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.Keypress;\nexport type ComputerUseMoveAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.Move;\nexport type ComputerUseScreenshotAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.Screenshot;\nexport type ComputerUseScrollAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.Scroll;\nexport type ComputerUseTypeAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.Type;\nexport type ComputerUseWaitAction =\n  OpenAIClient.Responses.ResponseComputerToolCall.Wait;\n\n/**\n * Union type of all computer use actions from OpenAI SDK.\n */\nexport type ComputerUseAction =\n  OpenAIClient.Responses.ResponseComputerToolCall[\"action\"];\n\n// Zod schemas for computer use actions\nconst ComputerUseScreenshotActionSchema = z.object({\n  type: z.literal(\"screenshot\"),\n});\n\nconst ComputerUseClickActionSchema = z.object({\n  type: z.literal(\"click\"),\n  x: z.number(),\n  y: z.number(),\n  button: z.enum([\"left\", \"right\", \"wheel\", \"back\", \"forward\"]).default(\"left\"),\n});\n\nconst ComputerUseDoubleClickActionSchema = z.object({\n  type: z.literal(\"double_click\"),\n  x: z.number(),\n  y: z.number(),\n  button: z.enum([\"left\", \"right\", \"wheel\", \"back\", \"forward\"]).default(\"left\"),\n});\n\nconst ComputerUseDragActionSchema = z.object({\n  type: z.literal(\"drag\"),\n  path: z.array(z.object({ x: z.number(), y: z.number() })),\n});\n\nconst ComputerUseKeypressActionSchema = z.object({\n  type: z.literal(\"keypress\"),\n  keys: z.array(z.string()),\n});\n\nconst ComputerUseMoveActionSchema = z.object({\n  type: z.literal(\"move\"),\n  x: z.number(),\n  y: z.number(),\n});\n\nconst ComputerUseScrollActionSchema = z.object({\n  type: z.literal(\"scroll\"),\n  x: z.number(),\n  y: z.number(),\n  scroll_x: z.number(),\n  scroll_y: z.number(),\n});\n\nconst ComputerUseTypeActionSchema = z.object({\n  type: z.literal(\"type\"),\n  text: z.string(),\n});\n\nconst ComputerUseWaitActionSchema = z.object({\n  type: z.literal(\"wait\"),\n  duration: z.number().optional(),\n});\n\n// Union schema for individual action types\nconst ComputerUseActionUnionSchema = z.union([\n  ComputerUseScreenshotActionSchema,\n  ComputerUseClickActionSchema,\n  ComputerUseDoubleClickActionSchema,\n  ComputerUseDragActionSchema,\n  ComputerUseKeypressActionSchema,\n  ComputerUseMoveActionSchema,\n  ComputerUseScrollActionSchema,\n  ComputerUseTypeActionSchema,\n  ComputerUseWaitActionSchema,\n]);\n\n// Schema for the input structure received from parseComputerCall\n// The action is wrapped in an `action` property: { action: { type: 'screenshot' } }\nexport const ComputerUseActionSchema = z.object({\n  action: ComputerUseActionUnionSchema,\n});\n\n// TypeScript types derived from Zod schemas\nexport type ComputerUseScreenshotActionType = z.infer<\n  typeof ComputerUseScreenshotActionSchema\n>;\nexport type ComputerUseClickActionType = z.infer<\n  typeof ComputerUseClickActionSchema\n>;\nexport type ComputerUseDoubleClickActionType = z.infer<\n  typeof ComputerUseDoubleClickActionSchema\n>;\nexport type ComputerUseDragActionType = z.infer<\n  typeof ComputerUseDragActionSchema\n>;\nexport type ComputerUseKeypressActionType = z.infer<\n  typeof ComputerUseKeypressActionSchema\n>;\nexport type ComputerUseMoveActionType = z.infer<\n  typeof ComputerUseMoveActionSchema\n>;\nexport type ComputerUseScrollActionType = z.infer<\n  typeof ComputerUseScrollActionSchema\n>;\nexport type ComputerUseTypeActionType = z.infer<\n  typeof ComputerUseTypeActionSchema\n>;\nexport type ComputerUseWaitActionType = z.infer<\n  typeof ComputerUseWaitActionSchema\n>;\n\n/**\n * Input structure for the Computer Use tool.\n * The action is wrapped in an `action` property.\n */\nexport interface ComputerUseInput {\n  action: ComputerUseAction;\n}\n\nexport type ComputerUseReturnType =\n  | string\n  | Promise<string>\n  | ToolMessage<any>\n  | Promise<ToolMessage<any>>;\n\n/**\n * Options for the Computer Use tool.\n */\nexport interface ComputerUseOptions {\n  /**\n   * The width of the computer display in pixels.\n   */\n  displayWidth: number;\n\n  /**\n   * The height of the computer display in pixels.\n   */\n  displayHeight: number;\n\n  /**\n   * The type of computer environment to control.\n   * - `browser`: Browser automation (recommended for most use cases)\n   * - `mac`: macOS environment\n   * - `windows`: Windows environment\n   * - `linux`: Linux environment\n   * - `ubuntu`: Ubuntu environment\n   */\n  environment: ComputerUseEnvironment;\n\n  /**\n   * Execute function that handles computer action execution.\n   * This function receives the action input and should return a base64-encoded\n   * screenshot of the result.\n   */\n  execute: (\n    action: ComputerUseAction,\n    runtime: ToolRuntime<any, any>\n  ) => ComputerUseReturnType;\n}\n\n/**\n * OpenAI Computer Use tool type for the Responses API.\n */\nexport type ComputerUseTool = OpenAIClient.Responses.ComputerUsePreviewTool;\n\nconst TOOL_NAME = \"computer_use\";\n\n/**\n * Creates a Computer Use tool that allows models to control computer interfaces\n * and perform tasks by simulating mouse clicks, keyboard input, scrolling, and more.\n *\n * **Computer Use** is a practical application of OpenAI's Computer-Using Agent (CUA)\n * model (`computer-use-preview`), which combines vision capabilities with advanced\n * reasoning to simulate controlling computer interfaces.\n *\n * **How it works**:\n * The tool operates in a continuous loop:\n * 1. Model sends computer actions (click, type, scroll, etc.)\n * 2. Your code executes these actions in a controlled environment\n * 3. You capture a screenshot of the result\n * 4. Send the screenshot back to the model\n * 5. Repeat until the task is complete\n *\n * **Important**: Computer use is in beta and requires careful consideration:\n * - Use in sandboxed environments only\n * - Do not use for high-stakes or authenticated tasks\n * - Always implement human-in-the-loop for important decisions\n * - Handle safety checks appropriately\n *\n * @see {@link https://platform.openai.com/docs/guides/tools-computer-use | OpenAI Computer Use Documentation}\n *\n * @param options - Configuration options for the Computer Use tool\n * @returns A Computer Use tool that can be passed to `bindTools`\n *\n * @example\n * ```typescript\n * import { ChatOpenAI, tools } from \"@langchain/openai\";\n *\n * const model = new ChatOpenAI({ model: \"computer-use-preview\" });\n *\n * // With execute callback for automatic action handling\n * const computer = tools.computerUse({\n *   displayWidth: 1024,\n *   displayHeight: 768,\n *   environment: \"browser\",\n *   execute: async (action) => {\n *     if (action.type === \"screenshot\") {\n *       return captureScreenshot();\n *     }\n *     if (action.type === \"click\") {\n *       await page.mouse.click(action.x, action.y, { button: action.button });\n *       return captureScreenshot();\n *     }\n *     if (action.type === \"type\") {\n *       await page.keyboard.type(action.text);\n *       return captureScreenshot();\n *     }\n *     // Handle other actions...\n *     return captureScreenshot();\n *   },\n * });\n *\n * const llmWithComputer = model.bindTools([computer]);\n * const response = await llmWithComputer.invoke(\n *   \"Check the latest news on bing.com\"\n * );\n * ```\n *\n * @example\n * ```typescript\n * // Without execute callback (manual action handling)\n * const computer = tools.computerUse({\n *   displayWidth: 1024,\n *   displayHeight: 768,\n *   environment: \"browser\",\n * });\n *\n * const response = await model.invoke(\"Check the news\", {\n *   tools: [computer],\n * });\n *\n * // Access the computer call from the response\n * const computerCall = response.additional_kwargs.tool_outputs?.find(\n *   (output) => output.type === \"computer_call\"\n * );\n * if (computerCall) {\n *   console.log(\"Action to execute:\", computerCall.action);\n *   // Execute the action manually, then send back a screenshot\n * }\n * ```\n *\n * @example\n * ```typescript\n * // For macOS desktop automation with Docker\n * const computer = tools.computerUse({\n *   displayWidth: 1920,\n *   displayHeight: 1080,\n *   environment: \"mac\",\n *   execute: async (action) => {\n *     if (action.type === \"click\") {\n *       await dockerExec(\n *         `DISPLAY=:99 xdotool mousemove ${action.x} ${action.y} click 1`,\n *         containerName\n *       );\n *     }\n *     // Capture screenshot from container\n *     return await getDockerScreenshot(containerName);\n *   },\n * });\n * ```\n *\n * @remarks\n * - Only available through the Responses API (not Chat Completions)\n * - Requires `computer-use-preview` model\n * - Actions include: click, double_click, drag, keypress, move, screenshot, scroll, type, wait\n * - Safety checks may be returned that require acknowledgment before proceeding\n * - Use `truncation: \"auto\"` parameter when making requests\n * - Recommended to use with `reasoning.summary` for debugging\n */\nexport function computerUse(options: ComputerUseOptions) {\n  const computerTool = tool(\n    async (\n      input: ComputerUseInput,\n      runtime: ToolRuntime<{ messages: BaseMessage[] }>\n    ) => {\n      /**\n       * get computer_use call id from runtime\n       */\n      const aiMessage = runtime.state?.messages.at(-1) as AIMessage | undefined;\n      const computerToolCall = aiMessage?.tool_calls?.find(\n        (tc) => tc.name === \"computer_use\"\n      );\n      const computerToolCallId = computerToolCall?.id;\n      if (!computerToolCallId) {\n        throw new Error(\"Computer use call id not found\");\n      }\n\n      const result = await options.execute(input.action, runtime);\n\n      /**\n       * make sure {@link ToolMessage} is returned with the correct additional kwargs\n       */\n      if (typeof result === \"string\") {\n        return new ToolMessage({\n          content: result,\n          tool_call_id: computerToolCallId,\n          additional_kwargs: {\n            type: \"computer_call_output\",\n          },\n        });\n      }\n\n      /**\n       * make sure {@link ToolMessage} is returned with the correct additional kwargs\n       */\n      return new ToolMessage({\n        ...result,\n        tool_call_id: computerToolCallId,\n        additional_kwargs: {\n          type: \"computer_call_output\",\n          ...result.additional_kwargs,\n        },\n      });\n    },\n    {\n      name: TOOL_NAME,\n      description:\n        \"Control a computer interface by executing mouse clicks, keyboard input, scrolling, and other actions.\",\n      schema: ComputerUseActionSchema,\n    }\n  );\n\n  computerTool.extras = {\n    ...(computerTool.extras ?? {}),\n    providerToolDefinition: {\n      type: \"computer_use_preview\",\n      display_width: options.displayWidth,\n      display_height: options.displayHeight,\n      environment: options.environment,\n    } satisfies ComputerUseTool,\n  };\n\n  /**\n   * return as typed {@link DynamicStructuredTool} so we don't get any type\n   * errors like \"can't export tool without reference\"\n   */\n  return computerTool as DynamicStructuredTool<\n    typeof ComputerUseActionSchema,\n    ComputerUseInput,\n    unknown,\n    ComputerUseReturnType\n  >;\n}\n"],"mappings":";;;;AAkDA,MAAM,oCAAoCA,OAAAA,EAAE,OAAO,EACjD,MAAMA,OAAAA,EAAE,QAAQ,aAAa,EAC9B,CAAC;AAEF,MAAM,+BAA+BA,OAAAA,EAAE,OAAO;CAC5C,MAAMA,OAAAA,EAAE,QAAQ,QAAQ;CACxB,GAAGA,OAAAA,EAAE,QAAQ;CACb,GAAGA,OAAAA,EAAE,QAAQ;CACb,QAAQA,OAAAA,EAAE,KAAK;EAAC;EAAQ;EAAS;EAAS;EAAQ;EAAU,CAAC,CAAC,QAAQ,OAAO;CAC9E,CAAC;AAEF,MAAM,qCAAqCA,OAAAA,EAAE,OAAO;CAClD,MAAMA,OAAAA,EAAE,QAAQ,eAAe;CAC/B,GAAGA,OAAAA,EAAE,QAAQ;CACb,GAAGA,OAAAA,EAAE,QAAQ;CACb,QAAQA,OAAAA,EAAE,KAAK;EAAC;EAAQ;EAAS;EAAS;EAAQ;EAAU,CAAC,CAAC,QAAQ,OAAO;CAC9E,CAAC;AAEF,MAAM,8BAA8BA,OAAAA,EAAE,OAAO;CAC3C,MAAMA,OAAAA,EAAE,QAAQ,OAAO;CACvB,MAAMA,OAAAA,EAAE,MAAMA,OAAAA,EAAE,OAAO;EAAE,GAAGA,OAAAA,EAAE,QAAQ;EAAE,GAAGA,OAAAA,EAAE,QAAQ;EAAE,CAAC,CAAC;CAC1D,CAAC;AAEF,MAAM,kCAAkCA,OAAAA,EAAE,OAAO;CAC/C,MAAMA,OAAAA,EAAE,QAAQ,WAAW;CAC3B,MAAMA,OAAAA,EAAE,MAAMA,OAAAA,EAAE,QAAQ,CAAC;CAC1B,CAAC;AAEF,MAAM,8BAA8BA,OAAAA,EAAE,OAAO;CAC3C,MAAMA,OAAAA,EAAE,QAAQ,OAAO;CACvB,GAAGA,OAAAA,EAAE,QAAQ;CACb,GAAGA,OAAAA,EAAE,QAAQ;CACd,CAAC;AAEF,MAAM,gCAAgCA,OAAAA,EAAE,OAAO;CAC7C,MAAMA,OAAAA,EAAE,QAAQ,SAAS;CACzB,GAAGA,OAAAA,EAAE,QAAQ;CACb,GAAGA,OAAAA,EAAE,QAAQ;CACb,UAAUA,OAAAA,EAAE,QAAQ;CACpB,UAAUA,OAAAA,EAAE,QAAQ;CACrB,CAAC;AAEF,MAAM,8BAA8BA,OAAAA,EAAE,OAAO;CAC3C,MAAMA,OAAAA,EAAE,QAAQ,OAAO;CACvB,MAAMA,OAAAA,EAAE,QAAQ;CACjB,CAAC;AAEF,MAAM,8BAA8BA,OAAAA,EAAE,OAAO;CAC3C,MAAMA,OAAAA,EAAE,QAAQ,OAAO;CACvB,UAAUA,OAAAA,EAAE,QAAQ,CAAC,UAAU;CAChC,CAAC;AAGF,MAAM,+BAA+BA,OAAAA,EAAE,MAAM;CAC3C;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD,CAAC;AAIF,MAAa,0BAA0BA,OAAAA,EAAE,OAAO,EAC9C,QAAQ,8BACT,CAAC;AAqFF,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkHlB,SAAgB,YAAY,SAA6B;CACvD,MAAM,gBAAA,GAAA,sBAAA,MACJ,OACE,OACA,YACG;EAQH,MAAM,uBAJY,QAAQ,OAAO,SAAS,GAAG,GAAG,GACZ,YAAY,MAC7C,OAAO,GAAG,SAAS,eACrB,GAC4C;AAC7C,MAAI,CAAC,mBACH,OAAM,IAAI,MAAM,iCAAiC;EAGnD,MAAM,SAAS,MAAM,QAAQ,QAAQ,MAAM,QAAQ,QAAQ;;;;AAK3D,MAAI,OAAO,WAAW,SACpB,QAAO,IAAIC,yBAAAA,YAAY;GACrB,SAAS;GACT,cAAc;GACd,mBAAmB,EACjB,MAAM,wBACP;GACF,CAAC;;;;AAMJ,SAAO,IAAIA,yBAAAA,YAAY;GACrB,GAAG;GACH,cAAc;GACd,mBAAmB;IACjB,MAAM;IACN,GAAG,OAAO;IACX;GACF,CAAC;IAEJ;EACE,MAAM;EACN,aACE;EACF,QAAQ;EACT,CACF;AAED,cAAa,SAAS;EACpB,GAAI,aAAa,UAAU,EAAE;EAC7B,wBAAwB;GACtB,MAAM;GACN,eAAe,QAAQ;GACvB,gBAAgB,QAAQ;GACxB,aAAa,QAAQ;GACtB;EACF;;;;;AAMD,QAAO"}