{"version":3,"file":"ai-model/workflows/inspect/planning-action-locate.mjs","sources":["../../../../../src/ai-model/workflows/inspect/planning-action-locate.ts"],"sourcesContent":["import type { DeviceAction } from '@/device';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { z } from 'zod';\nimport { type TUserPrompt, getMidsceneLocationSchema } from '../../../common';\nimport { ScreenshotItem } from '../../../screenshot-item';\nimport { ConversationHistory } from '../../conversation-history';\nimport type { ResolvedCustomPlanningDefinition } from '../../model-adapter/custom-planning-types';\nimport type { PlanningTapLocatorDefinition } from '../../model-adapter/types';\nimport { AIResponseParseError } from '../../service-caller/index';\nimport { runCustomPlanning } from '../planning/custom-planning';\nimport type { PlanOptions } from '../planning/types';\nimport type {\n  LocateFn,\n  LocateModelResponse,\n  LocateOptions,\n  LocateRequestContext,\n} from './types';\n\nconst debugInspect = getDebug('ai:inspect');\n\nconst planningActionLocatorActionSpace: DeviceAction[] = [\n  {\n    name: 'Tap',\n    description: 'Tap the element',\n    paramSchema: z.object({\n      locate: getMidsceneLocationSchema(),\n    }),\n    call: async () => undefined,\n  },\n];\n\nasync function buildPlanningTapLocatorPlanOptions(\n  locateRequest: LocateRequestContext,\n): Promise<PlanOptions> {\n  const { options, locateImage } = locateRequest;\n  const { context } = options;\n\n  return {\n    ...options,\n    context: {\n      ...context,\n      screenshot: ScreenshotItem.create(\n        locateImage.imageBase64,\n        context.screenshot.capturedAt,\n      ),\n      shotSize: {\n        width: locateImage.width,\n        height: locateImage.height,\n      },\n    },\n    actionSpace: planningActionLocatorActionSpace,\n    conversationHistory: new ConversationHistory(),\n    includeLocateInPlanning: true,\n    referenceImageMessages: locateRequest.referenceImageMessages,\n  };\n}\n\nexport function resolvePlanningTapLocator<TParsed>(\n  definition: PlanningTapLocatorDefinition,\n  planner: ResolvedCustomPlanningDefinition<TParsed>,\n): LocateFn {\n  const locatorPlanner: ResolvedCustomPlanningDefinition<TParsed> = {\n    ...planner,\n    messages: {\n      ...planner.messages,\n      buildSystemPrompt: definition.buildSystemPrompt,\n      buildUserInstruction: (instruction) => `Tap: ${instruction}`,\n    },\n  };\n\n  return async (\n    elementDescription: TUserPrompt,\n    options: LocateOptions,\n    locateRequest: LocateRequestContext,\n  ): Promise<LocateModelResponse> => {\n    assert(elementDescription, 'cannot find the target element description');\n\n    let errors: string[] = [];\n    let reasoningContent = '';\n    let rawResponse = '';\n    let rawChoiceMessage: unknown;\n    let usage: LocateModelResponse['usage'];\n\n    try {\n      const locatePlanOptions =\n        await buildPlanningTapLocatorPlanOptions(locateRequest);\n      const planningResponse = await runCustomPlanning(\n        elementDescription,\n        locatePlanOptions,\n        locatorPlanner,\n      );\n\n      rawResponse = planningResponse.rawResponse ?? '';\n      rawChoiceMessage = planningResponse.rawChoiceMessage;\n      usage = planningResponse.usage;\n      reasoningContent = planningResponse.log;\n\n      debugInspect('planning-tap-locator rawResponse:', rawResponse);\n\n      const locatedPixelBbox = definition.getLocatedPixelBbox(\n        planningResponse.actions ?? [],\n      );\n\n      if (!locatedPixelBbox) {\n        throw new Error('No locatedPixelBbox found in planner response');\n      }\n\n      return {\n        locatedPixelBbox,\n        rawResponse,\n        rawChoiceMessage,\n        usage,\n        reasoningContent,\n      };\n    } catch (error) {\n      const errorMessage =\n        error instanceof Error ? error.message : String(error);\n      if (error instanceof AIResponseParseError) {\n        rawResponse = error.rawResponse;\n        rawChoiceMessage = error.rawChoiceMessage;\n        usage = error.usage;\n      }\n      errors = [\n        errorMessage || 'Failed to parse planning tap locator response',\n      ];\n      debugInspect('planning-tap-locator parse error:', errors[0]);\n    }\n\n    return {\n      rawResponse,\n      rawChoiceMessage,\n      usage,\n      reasoningContent,\n      errors,\n    };\n  };\n}\n"],"names":["debugInspect","getDebug","planningActionLocatorActionSpace","z","getMidsceneLocationSchema","undefined","buildPlanningTapLocatorPlanOptions","locateRequest","options","locateImage","context","ScreenshotItem","ConversationHistory","resolvePlanningTapLocator","definition","planner","locatorPlanner","instruction","elementDescription","assert","errors","reasoningContent","rawResponse","rawChoiceMessage","usage","locatePlanOptions","planningResponse","runCustomPlanning","locatedPixelBbox","Error","error","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;;AAmBA,MAAMA,eAAeC,SAAS;AAE9B,MAAMC,mCAAmD;IACvD;QACE,MAAM;QACN,aAAa;QACb,aAAaC,EAAE,MAAM,CAAC;YACpB,QAAQC;QACV;QACA,MAAM,UAAYC;IACpB;CACD;AAED,eAAeC,mCACbC,aAAmC;IAEnC,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAE,GAAGF;IACjC,MAAM,EAAEG,OAAO,EAAE,GAAGF;IAEpB,OAAO;QACL,GAAGA,OAAO;QACV,SAAS;YACP,GAAGE,OAAO;YACV,YAAYC,eAAe,MAAM,CAC/BF,YAAY,WAAW,EACvBC,QAAQ,UAAU,CAAC,UAAU;YAE/B,UAAU;gBACR,OAAOD,YAAY,KAAK;gBACxB,QAAQA,YAAY,MAAM;YAC5B;QACF;QACA,aAAaP;QACb,qBAAqB,IAAIU;QACzB,yBAAyB;QACzB,wBAAwBL,cAAc,sBAAsB;IAC9D;AACF;AAEO,SAASM,0BACdC,UAAwC,EACxCC,OAAkD;IAElD,MAAMC,iBAA4D;QAChE,GAAGD,OAAO;QACV,UAAU;YACR,GAAGA,QAAQ,QAAQ;YACnB,mBAAmBD,WAAW,iBAAiB;YAC/C,sBAAsB,CAACG,cAAgB,CAAC,KAAK,EAAEA,aAAa;QAC9D;IACF;IAEA,OAAO,OACLC,oBACAV,SACAD;QAEAY,OAAOD,oBAAoB;QAE3B,IAAIE,SAAmB,EAAE;QACzB,IAAIC,mBAAmB;QACvB,IAAIC,cAAc;QAClB,IAAIC;QACJ,IAAIC;QAEJ,IAAI;YACF,MAAMC,oBACJ,MAAMnB,mCAAmCC;YAC3C,MAAMmB,mBAAmB,MAAMC,kBAC7BT,oBACAO,mBACAT;YAGFM,cAAcI,iBAAiB,WAAW,IAAI;YAC9CH,mBAAmBG,iBAAiB,gBAAgB;YACpDF,QAAQE,iBAAiB,KAAK;YAC9BL,mBAAmBK,iBAAiB,GAAG;YAEvC1B,aAAa,qCAAqCsB;YAElD,MAAMM,mBAAmBd,WAAW,mBAAmB,CACrDY,iBAAiB,OAAO,IAAI,EAAE;YAGhC,IAAI,CAACE,kBACH,MAAM,IAAIC,MAAM;YAGlB,OAAO;gBACLD;gBACAN;gBACAC;gBACAC;gBACAH;YACF;QACF,EAAE,OAAOS,OAAO;YACd,MAAMC,eACJD,iBAAiBD,QAAQC,MAAM,OAAO,GAAGE,OAAOF;YAClD,IAAIA,iBAAiBG,sBAAsB;gBACzCX,cAAcQ,MAAM,WAAW;gBAC/BP,mBAAmBO,MAAM,gBAAgB;gBACzCN,QAAQM,MAAM,KAAK;YACrB;YACAV,SAAS;gBACPW,gBAAgB;aACjB;YACD/B,aAAa,qCAAqCoB,MAAM,CAAC,EAAE;QAC7D;QAEA,OAAO;YACLE;YACAC;YACAC;YACAH;YACAD;QACF;IACF;AACF"}