{"version":3,"file":"element-describer.mjs","sources":["../../src/element-describer.ts"],"sourcesContent":["import type { ModelRuntime } from '@/ai-model/models';\nimport { assert } from '@midscene/shared/utils';\nimport { createScreenshotBoundUIContext } from './agent/utils';\nimport type Service from './service';\nimport type {\n  AgentDescribeElementAtPointResult,\n  LocateOption,\n  LocateResultElement,\n  LocateValidatorResult,\n  LocatorValidatorOption,\n  Rect,\n  Size,\n  UIContext,\n} from './types';\nimport { buildDetailedLocateParam } from './yaml/utils';\n\nexport type DescribeElementCoordinateSpace = 'screenshot' | 'logical';\n\nexport type LocatorVerifyFn = (input: {\n  prompt: string;\n  expectCenter: [number, number];\n  deepLocate: boolean;\n  retryCount: number;\n  verifyResult: LocateValidatorResult;\n}) => LocateValidatorResult | boolean;\n\nexport type DescribeElementAtPointOptions = {\n  verifyPrompt?: boolean;\n  retryLimit?: number;\n  deepLocate?: boolean;\n  locatorVerifyFn?: LocatorVerifyFn;\n  screenshotBase64?: string;\n  screenshotSize?: Size;\n  coordinateSpace?: DescribeElementCoordinateSpace;\n  logicalSize?: Size;\n  onProgress?: (progress: {\n    prompt?: string;\n    deepLocate?: boolean;\n    verifyResult?: LocateValidatorResult;\n  }) => void;\n} & LocatorValidatorOption;\n\ntype ScreenshotBoundContextOptions = {\n  screenshotBase64?: string;\n  screenshotSize?: Size;\n  coordinateSpace?: DescribeElementCoordinateSpace;\n  logicalSize?: Size;\n};\n\nexport type VerifyElementDescriptionAtPointOptions =\n  ScreenshotBoundContextOptions & LocatorValidatorOption;\n\nexport type VerifyElementByServiceLocateOptions =\n  VerifyElementDescriptionAtPointOptions &\n    Pick<LocateOption, 'cacheable' | 'deepLocate' | 'xpath'> & {\n      abortSignal?: AbortSignal;\n    };\n\nexport type ElementDescriberRuntime = {\n  service: Pick<Service, 'describe' | 'locate'>;\n  describeModelRuntime: ModelRuntime;\n  locateModelRuntime: ModelRuntime;\n};\n\nconst distanceOfTwoPoints = (p1: [number, number], p2: [number, number]) => {\n  const [x1, y1] = p1;\n  const [x2, y2] = p2;\n  return Math.round(Math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2));\n};\n\nconst includedInRect = (point: [number, number], rect: Rect) => {\n  const [x, y] = point;\n  const { left, top, width, height } = rect;\n  return x >= left && x <= left + width && y >= top && y <= top + height;\n};\n\nconst buildLocateValidatorResult = (\n  expectCenter: [number, number],\n  located: Pick<LocateResultElement, 'center' | 'rect'>,\n  verifyLocateOption?: LocatorValidatorOption,\n): LocateValidatorResult => {\n  const distance = distanceOfTwoPoints(expectCenter, located.center);\n  const included = includedInRect(expectCenter, located.rect);\n  const pass =\n    distance <= (verifyLocateOption?.centerDistanceThreshold || 20) || included;\n  return {\n    pass,\n    rect: located.rect,\n    center: located.center,\n    centerDistance: distance,\n    includedInRect: included,\n  };\n};\n\nfunction assertPositiveSize(\n  size: Size | undefined,\n  label: string,\n): asserts size is Size {\n  assert(\n    size &&\n      Number.isFinite(size.width) &&\n      Number.isFinite(size.height) &&\n      size.width > 0 &&\n      size.height > 0,\n    `${label} must include positive width and height`,\n  );\n}\n\nconst mapPointToScreenshotSpace = (\n  center: [number, number],\n  screenshotSize: Size,\n  opt: ScreenshotBoundContextOptions,\n): [number, number] => {\n  const coordinateSpace = opt.coordinateSpace || 'screenshot';\n  if (coordinateSpace === 'screenshot') {\n    return center;\n  }\n\n  assertPositiveSize(\n    opt.logicalSize,\n    'logicalSize is required when coordinateSpace is logical',\n  );\n  return [\n    (center[0] * screenshotSize.width) / opt.logicalSize.width,\n    (center[1] * screenshotSize.height) / opt.logicalSize.height,\n  ];\n};\n\nconst createScreenshotBoundLocatorContext = async (\n  center: [number, number],\n  opt?: ScreenshotBoundContextOptions,\n): Promise<{\n  screenshotContext?: UIContext;\n  locateOpt?: LocateOption;\n  targetCenter: [number, number];\n}> => {\n  const screenshotContext = opt?.screenshotBase64\n    ? await createScreenshotBoundUIContext(opt.screenshotBase64, opt)\n    : undefined;\n  const targetCenter = screenshotContext\n    ? mapPointToScreenshotSpace(center, screenshotContext.shotSize, opt || {})\n    : center;\n  return {\n    screenshotContext,\n    locateOpt: screenshotContext ? { uiContext: screenshotContext } : undefined,\n    targetCenter,\n  };\n};\n\nexport async function verifyLocator(\n  runtime: Pick<ElementDescriberRuntime, 'service' | 'locateModelRuntime'>,\n  prompt: string,\n  locateOpt: LocateOption | undefined,\n  expectCenter: [number, number],\n  verifyLocateOption?: LocatorValidatorOption,\n): Promise<LocateValidatorResult> {\n  return locateAndVerify(runtime, prompt, expectCenter, {\n    centerDistanceThreshold: verifyLocateOption?.centerDistanceThreshold,\n    uiContext: locateOpt?.uiContext,\n  });\n}\n\nfunction applyLocatorVerifyFn(\n  locatorVerifyFn: LocatorVerifyFn | undefined,\n  input: Parameters<LocatorVerifyFn>[0],\n): LocateValidatorResult {\n  if (!locatorVerifyFn) {\n    return input.verifyResult;\n  }\n  const customResult = locatorVerifyFn(input);\n  if (typeof customResult === 'boolean') {\n    return {\n      ...input.verifyResult,\n      pass: customResult,\n    };\n  }\n  return customResult;\n}\n\nfunction errorMessage(error: unknown): string {\n  return error instanceof Error ? error.message : String(error);\n}\n\nexport async function describeElementAtPoint(\n  runtime: ElementDescriberRuntime,\n  center: [number, number],\n  opt?: DescribeElementAtPointOptions,\n): Promise<AgentDescribeElementAtPointResult> {\n  const { verifyPrompt = true, retryLimit = 3 } = opt || {};\n  const { screenshotContext, locateOpt, targetCenter } =\n    await createScreenshotBoundLocatorContext(center, opt);\n\n  let success = false;\n  let retryCount = 0;\n  let resultPrompt = '';\n  let deepLocate = opt?.deepLocate || false;\n  let verifyResult: LocateValidatorResult | undefined;\n  let lastError: string | undefined;\n  let failureStage: AgentDescribeElementAtPointResult['failureStage'];\n\n  while (!success && retryCount < retryLimit) {\n    if (retryCount >= 2) {\n      deepLocate = true;\n    }\n    const describeOpt = screenshotContext\n      ? { deepLocate, context: screenshotContext }\n      : { deepLocate };\n\n    let text: Awaited<\n      ReturnType<ElementDescriberRuntime['service']['describe']>\n    >;\n    try {\n      text = await runtime.service.describe(\n        targetCenter,\n        runtime.describeModelRuntime,\n        describeOpt,\n      );\n    } catch (error) {\n      return {\n        prompt: resultPrompt,\n        deepLocate,\n        verifyResult,\n        success: false,\n        error: errorMessage(error),\n        failureStage: 'describe',\n      };\n    }\n    if (!text.description) {\n      return {\n        prompt: resultPrompt,\n        deepLocate,\n        verifyResult,\n        success: false,\n        error: `failed to describe element at [${targetCenter}]`,\n        failureStage: 'describe',\n      };\n    }\n    resultPrompt = text.description;\n\n    if (!verifyPrompt) {\n      opt?.onProgress?.({ prompt: resultPrompt, deepLocate });\n      success = true;\n      break;\n    }\n\n    try {\n      verifyResult = await verifyLocator(\n        runtime,\n        resultPrompt,\n        locateOpt,\n        targetCenter,\n        opt,\n      );\n      verifyResult = applyLocatorVerifyFn(opt?.locatorVerifyFn, {\n        prompt: resultPrompt,\n        expectCenter: targetCenter,\n        deepLocate,\n        retryCount,\n        verifyResult,\n      });\n      opt?.onProgress?.({ prompt: resultPrompt, deepLocate, verifyResult });\n      if (verifyResult.pass) {\n        success = true;\n      } else {\n        lastError = undefined;\n        failureStage = 'verify';\n        retryCount++;\n      }\n    } catch (error) {\n      lastError = errorMessage(error);\n      failureStage = 'verify';\n      opt?.onProgress?.({ prompt: resultPrompt, deepLocate });\n      retryCount++;\n    }\n  }\n\n  return {\n    prompt: resultPrompt,\n    deepLocate,\n    verifyResult,\n    success,\n    error:\n      success || !verifyPrompt\n        ? undefined\n        : lastError || 'describeElementAtPoint verify failed',\n    failureStage: success ? undefined : failureStage,\n  };\n}\n\nexport async function verifyElementDescriptionAtPoint(\n  runtime: ElementDescriberRuntime,\n  description: string,\n  center: [number, number],\n  opt?: VerifyElementDescriptionAtPointOptions,\n): Promise<LocateValidatorResult> {\n  assert(description?.trim(), 'description must not be empty');\n  const { locateOpt, targetCenter } = await createScreenshotBoundLocatorContext(\n    center,\n    opt,\n  );\n\n  return verifyLocator(runtime, description, locateOpt, targetCenter, opt);\n}\n\nexport async function verifyElementByServiceLocate(\n  runtime: Pick<ElementDescriberRuntime, 'service' | 'locateModelRuntime'>,\n  description: string,\n  center: [number, number],\n  opt?: VerifyElementByServiceLocateOptions,\n): Promise<LocateValidatorResult> {\n  return locateAndVerify(runtime, description, center, opt);\n}\n\nasync function locateAndVerify(\n  runtime: Pick<ElementDescriberRuntime, 'service' | 'locateModelRuntime'>,\n  description: string,\n  center: [number, number],\n  opt?: VerifyElementByServiceLocateOptions & { uiContext?: UIContext },\n): Promise<LocateValidatorResult> {\n  assert(description?.trim(), 'description must not be empty');\n  const { screenshotContext, targetCenter } =\n    await createScreenshotBoundLocatorContext(center, opt);\n  const context = opt?.uiContext || screenshotContext;\n  const locateParam = buildDetailedLocateParam(description, {\n    cacheable: opt?.cacheable,\n    deepLocate: opt?.deepLocate,\n    xpath: opt?.xpath,\n  });\n  assert(locateParam, 'cannot get locate param for service locate');\n\n  const locateResult = await runtime.service.locate(\n    locateParam,\n    context ? { context } : {},\n    runtime.locateModelRuntime,\n    opt?.abortSignal,\n  );\n  assert(locateResult.element, `Element not found: ${description}`);\n  return buildLocateValidatorResult(targetCenter, locateResult.element, opt);\n}\n"],"names":["distanceOfTwoPoints","p1","p2","x1","y1","x2","y2","Math","includedInRect","point","rect","x","y","left","top","width","height","buildLocateValidatorResult","expectCenter","located","verifyLocateOption","distance","included","pass","assertPositiveSize","size","label","assert","Number","mapPointToScreenshotSpace","center","screenshotSize","opt","coordinateSpace","createScreenshotBoundLocatorContext","screenshotContext","createScreenshotBoundUIContext","undefined","targetCenter","verifyLocator","runtime","prompt","locateOpt","locateAndVerify","applyLocatorVerifyFn","locatorVerifyFn","input","customResult","errorMessage","error","Error","String","describeElementAtPoint","verifyPrompt","retryLimit","success","retryCount","resultPrompt","deepLocate","verifyResult","lastError","failureStage","describeOpt","text","verifyElementDescriptionAtPoint","description","verifyElementByServiceLocate","context","locateParam","buildDetailedLocateParam","locateResult"],"mappings":";;;AAgEA,MAAMA,sBAAsB,CAACC,IAAsBC;IACjD,MAAM,CAACC,IAAIC,GAAG,GAAGH;IACjB,MAAM,CAACI,IAAIC,GAAG,GAAGJ;IACjB,OAAOK,KAAK,KAAK,CAACA,KAAK,IAAI,CAAEJ,AAAAA,CAAAA,KAAKE,EAAC,KAAM,IAAKD,AAAAA,CAAAA,KAAKE,EAAC,KAAM;AAC5D;AAEA,MAAME,iBAAiB,CAACC,OAAyBC;IAC/C,MAAM,CAACC,GAAGC,EAAE,GAAGH;IACf,MAAM,EAAEI,IAAI,EAAEC,GAAG,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGN;IACrC,OAAOC,KAAKE,QAAQF,KAAKE,OAAOE,SAASH,KAAKE,OAAOF,KAAKE,MAAME;AAClE;AAEA,MAAMC,6BAA6B,CACjCC,cACAC,SACAC;IAEA,MAAMC,WAAWrB,oBAAoBkB,cAAcC,QAAQ,MAAM;IACjE,MAAMG,WAAWd,eAAeU,cAAcC,QAAQ,IAAI;IAC1D,MAAMI,OACJF,YAAaD,CAAAA,oBAAoB,2BAA2B,EAAC,KAAME;IACrE,OAAO;QACLC;QACA,MAAMJ,QAAQ,IAAI;QAClB,QAAQA,QAAQ,MAAM;QACtB,gBAAgBE;QAChB,gBAAgBC;IAClB;AACF;AAEA,SAASE,mBACPC,IAAsB,EACtBC,KAAa;IAEbC,OACEF,QACEG,OAAO,QAAQ,CAACH,KAAK,KAAK,KAC1BG,OAAO,QAAQ,CAACH,KAAK,MAAM,KAC3BA,KAAK,KAAK,GAAG,KACbA,KAAK,MAAM,GAAG,GAChB,GAAGC,MAAM,uCAAuC,CAAC;AAErD;AAEA,MAAMG,4BAA4B,CAChCC,QACAC,gBACAC;IAEA,MAAMC,kBAAkBD,IAAI,eAAe,IAAI;IAC/C,IAAIC,AAAoB,iBAApBA,iBACF,OAAOH;IAGTN,mBACEQ,IAAI,WAAW,EACf;IAEF,OAAO;QACJF,MAAM,CAAC,EAAE,GAAGC,eAAe,KAAK,GAAIC,IAAI,WAAW,CAAC,KAAK;QACzDF,MAAM,CAAC,EAAE,GAAGC,eAAe,MAAM,GAAIC,IAAI,WAAW,CAAC,MAAM;KAC7D;AACH;AAEA,MAAME,sCAAsC,OAC1CJ,QACAE;IAMA,MAAMG,oBAAoBH,KAAK,mBAC3B,MAAMI,+BAA+BJ,IAAI,gBAAgB,EAAEA,OAC3DK;IACJ,MAAMC,eAAeH,oBACjBN,0BAA0BC,QAAQK,kBAAkB,QAAQ,EAAEH,OAAO,CAAC,KACtEF;IACJ,OAAO;QACLK;QACA,WAAWA,oBAAoB;YAAE,WAAWA;QAAkB,IAAIE;QAClEC;IACF;AACF;AAEO,eAAeC,cACpBC,OAAwE,EACxEC,MAAc,EACdC,SAAmC,EACnCxB,YAA8B,EAC9BE,kBAA2C;IAE3C,OAAOuB,gBAAgBH,SAASC,QAAQvB,cAAc;QACpD,yBAAyBE,oBAAoB;QAC7C,WAAWsB,WAAW;IACxB;AACF;AAEA,SAASE,qBACPC,eAA4C,EAC5CC,KAAqC;IAErC,IAAI,CAACD,iBACH,OAAOC,MAAM,YAAY;IAE3B,MAAMC,eAAeF,gBAAgBC;IACrC,IAAI,AAAwB,aAAxB,OAAOC,cACT,OAAO;QACL,GAAGD,MAAM,YAAY;QACrB,MAAMC;IACR;IAEF,OAAOA;AACT;AAEA,SAASC,aAAaC,KAAc;IAClC,OAAOA,iBAAiBC,QAAQD,MAAM,OAAO,GAAGE,OAAOF;AACzD;AAEO,eAAeG,uBACpBZ,OAAgC,EAChCV,MAAwB,EACxBE,GAAmC;IAEnC,MAAM,EAAEqB,eAAe,IAAI,EAAEC,aAAa,CAAC,EAAE,GAAGtB,OAAO,CAAC;IACxD,MAAM,EAAEG,iBAAiB,EAAEO,SAAS,EAAEJ,YAAY,EAAE,GAClD,MAAMJ,oCAAoCJ,QAAQE;IAEpD,IAAIuB,UAAU;IACd,IAAIC,aAAa;IACjB,IAAIC,eAAe;IACnB,IAAIC,aAAa1B,KAAK,cAAc;IACpC,IAAI2B;IACJ,IAAIC;IACJ,IAAIC;IAEJ,MAAO,CAACN,WAAWC,aAAaF,WAAY;QAC1C,IAAIE,cAAc,GAChBE,aAAa;QAEf,MAAMI,cAAc3B,oBAChB;YAAEuB;YAAY,SAASvB;QAAkB,IACzC;YAAEuB;QAAW;QAEjB,IAAIK;QAGJ,IAAI;YACFA,OAAO,MAAMvB,QAAQ,OAAO,CAAC,QAAQ,CACnCF,cACAE,QAAQ,oBAAoB,EAC5BsB;QAEJ,EAAE,OAAOb,OAAO;YACd,OAAO;gBACL,QAAQQ;gBACRC;gBACAC;gBACA,SAAS;gBACT,OAAOX,aAAaC;gBACpB,cAAc;YAChB;QACF;QACA,IAAI,CAACc,KAAK,WAAW,EACnB,OAAO;YACL,QAAQN;YACRC;YACAC;YACA,SAAS;YACT,OAAO,CAAC,+BAA+B,EAAErB,aAAa,CAAC,CAAC;YACxD,cAAc;QAChB;QAEFmB,eAAeM,KAAK,WAAW;QAE/B,IAAI,CAACV,cAAc;YACjBrB,KAAK,aAAa;gBAAE,QAAQyB;gBAAcC;YAAW;YACrDH,UAAU;YACV;QACF;QAEA,IAAI;YACFI,eAAe,MAAMpB,cACnBC,SACAiB,cACAf,WACAJ,cACAN;YAEF2B,eAAef,qBAAqBZ,KAAK,iBAAiB;gBACxD,QAAQyB;gBACR,cAAcnB;gBACdoB;gBACAF;gBACAG;YACF;YACA3B,KAAK,aAAa;gBAAE,QAAQyB;gBAAcC;gBAAYC;YAAa;YACnE,IAAIA,aAAa,IAAI,EACnBJ,UAAU;iBACL;gBACLK,YAAYvB;gBACZwB,eAAe;gBACfL;YACF;QACF,EAAE,OAAOP,OAAO;YACdW,YAAYZ,aAAaC;YACzBY,eAAe;YACf7B,KAAK,aAAa;gBAAE,QAAQyB;gBAAcC;YAAW;YACrDF;QACF;IACF;IAEA,OAAO;QACL,QAAQC;QACRC;QACAC;QACAJ;QACA,OACEA,WAAW,CAACF,eACRhB,SACAuB,aAAa;QACnB,cAAcL,UAAUlB,SAAYwB;IACtC;AACF;AAEO,eAAeG,gCACpBxB,OAAgC,EAChCyB,WAAmB,EACnBnC,MAAwB,EACxBE,GAA4C;IAE5CL,OAAOsC,aAAa,QAAQ;IAC5B,MAAM,EAAEvB,SAAS,EAAEJ,YAAY,EAAE,GAAG,MAAMJ,oCACxCJ,QACAE;IAGF,OAAOO,cAAcC,SAASyB,aAAavB,WAAWJ,cAAcN;AACtE;AAEO,eAAekC,6BACpB1B,OAAwE,EACxEyB,WAAmB,EACnBnC,MAAwB,EACxBE,GAAyC;IAEzC,OAAOW,gBAAgBH,SAASyB,aAAanC,QAAQE;AACvD;AAEA,eAAeW,gBACbH,OAAwE,EACxEyB,WAAmB,EACnBnC,MAAwB,EACxBE,GAAqE;IAErEL,OAAOsC,aAAa,QAAQ;IAC5B,MAAM,EAAE9B,iBAAiB,EAAEG,YAAY,EAAE,GACvC,MAAMJ,oCAAoCJ,QAAQE;IACpD,MAAMmC,UAAUnC,KAAK,aAAaG;IAClC,MAAMiC,cAAcC,yBAAyBJ,aAAa;QACxD,WAAWjC,KAAK;QAChB,YAAYA,KAAK;QACjB,OAAOA,KAAK;IACd;IACAL,OAAOyC,aAAa;IAEpB,MAAME,eAAe,MAAM9B,QAAQ,OAAO,CAAC,MAAM,CAC/C4B,aACAD,UAAU;QAAEA;IAAQ,IAAI,CAAC,GACzB3B,QAAQ,kBAAkB,EAC1BR,KAAK;IAEPL,OAAO2C,aAAa,OAAO,EAAE,CAAC,mBAAmB,EAAEL,aAAa;IAChE,OAAOhD,2BAA2BqB,cAAcgC,aAAa,OAAO,EAAEtC;AACxE"}