{"version":3,"sources":["../../../src/tools/web/webCrawler.ts"],"names":["WebCrawlerToolOutput","JSONToolOutput","getTextContent","result","url","statusCode","statusText","contentType","content","join","defaultParser","response","text","headers","get","includes","stripHtml","WebCrawlerTool","Tool","name","description","inputSchema","z","object","string","describe","client","parser","emitter","Emitter","root","child","namespace","creator","constructor","options","fetch","_run","_options","run","redirect","request","signal","status","createSnapshot","loadSnapshot","snapshot","Object","assign"],"mappings":";;;;;;;;;;AAcC,SAAA,YAAA,CAAA,UAAA,EAAA,MAAA,EAAA,GAAA,EAAA,IAAA,EAAA;;;;;;AAAA,MAAA,CAAA,YAAA,EAAA,cAAA,CAAA;;;;;AAwBM,MAAMA,6BAA6BC,uBAAAA,CAAAA;EAtC1C;;;EAwCEC,cAAyB,GAAA;AACvB,IAAO,OAAA;MACL,CAAQ,KAAA,EAAA,IAAA,CAAKC,OAAOC,GAAG,CAAA,CAAA;AACvB,MAAA,CAAA,QAAA,EAAW,KAAKD,MAAOE,CAAAA,UAAU,CAAK,EAAA,EAAA,IAAA,CAAKF,OAAOG,UAAU,CAAA,CAAA,CAAA;MAC5D,CAAiB,cAAA,EAAA,IAAA,CAAKH,OAAOI,WAAW,CAAA,CAAA;MACxC,CAAY,SAAA,EAAA,IAAA,CAAKJ,OAAOK,OAAO,CAAA;AAC/BC,KAAAA,CAAAA,IAAAA,CAAK,IAAA,CAAA;AACT;AACF;;;;;;;AAiBA,eAAeC,cAAcC,QAA4B,EAAA;AACvD,EAAMC,MAAAA,IAAAA,GAAO,MAAMD,QAAAA,CAASC,IAAI,EAAA;AAChC,EAAA,IAAIA,IAAM,EAAA;AACR,IAAA,MAAML,WAAcI,GAAAA,QAAAA,CAASE,OAAQC,CAAAA,GAAAA,CAAI,cAAA,CAAmB,IAAA,EAAA;AAC5D,IAAIP,IAAAA,WAAAA,CAAYQ,QAAS,CAAA,WAAA,CAAc,EAAA;AACrC,MAAOC,OAAAA,yBAAAA,CAAUJ,IAAAA,CAAMT,CAAAA,MAAAA;AACzB;AACF;AACA,EAAA,OAAOS,IAAQ,IAAA,YAAA;AACjB;AATeF,MAAAA,CAAAA,aAAAA,EAAAA,eAAAA,CAAAA;AAWR,MAAMO,uBAAuBC,aAAAA,CAAAA;EA5EpC;;;EA6EEC,IAAO,GAAA,YAAA;EACPC,WAAc,GAAA,CAAA,0CAAA,CAAA;EACdC,WAAc,GAAA;AACZ,IAAA,OAAOC,MAAEC,MAAO,CAAA;AACdnB,MAAAA,GAAAA,EAAKkB,MAAEE,MAAM,EAAA,CAAGpB,GAAG,EAAA,CAAGqB,SAAS,aAAA;KACjC,CAAA;AACF;AAEUC,EAAAA,MAAAA;AACAC,EAAAA,MAAAA;EAEMC,OAA8DC,GAAAA,mBAAAA,CAAQC,KAAKC,KAAM,CAAA;IAC/FC,SAAW,EAAA;AAAC,MAAA,MAAA;AAAQ,MAAA;;IACpBC,OAAS,EAAA;GACX,CAAA;AAEAC,EAAAA,WAAAA,CAAY,EAAER,MAAQC,EAAAA,MAAAA,EAAQ,GAAGQ,OAAAA,EAAAA,GAAuC,EAAI,EAAA;AAC1E,IAAA,KAAA,CAAMA,OAAAA,CAAAA;AACN,IAAA,IAAA,CAAKT,SAASA,MAAUU,IAAAA,KAAAA;AACxB,IAAA,IAAA,CAAKT,SAASA,MAAUjB,IAAAA,aAAAA;AAC1B;AAEA,EAAA,MAAgB2B,IACd,CAAA,EAAEjC,GAAG,EAAA,EACLkC,UACAC,GACA,EAAA;AACA,IAAA,MAAM5B,QAAW,GAAA,MAAM,IAAKe,CAAAA,MAAAA,CAAOtB,GAAK,EAAA;MACtCoC,QAAU,EAAA,QAAA;AACV,MAAA,GAAG,KAAKL,OAAQM,CAAAA,OAAAA;AAChBC,MAAAA,MAAAA,EAAQH,GAAIG,CAAAA;KACd,CAAA;AAEA,IAAA,MAAMlC,OAAU,GAAA,MAAM,IAAKmB,CAAAA,MAAAA,CAAOhB,QAAAA,CAAAA;AAClC,IAAA,OAAO,IAAIX,oBAAqB,CAAA;AAC9BI,MAAAA,GAAAA;AACAC,MAAAA,UAAAA,EAAYM,QAASgC,CAAAA,MAAAA;AACrBrC,MAAAA,UAAAA,EAAYK,QAASL,CAAAA,UAAAA;AACrBC,MAAAA,WAAAA,EAAaI,QAASE,CAAAA,OAAAA,CAAQC,GAAI,CAAA,cAAA,CAAmB,IAAA,SAAA;AACrDN,MAAAA;KACF,CAAA;AACF;EAEAoC,cAAiB,GAAA;AACf,IAAO,OAAA;AACL,MAAA,GAAG,MAAMA,cAAAA,EAAAA;AACTlB,MAAAA,MAAAA,EAAQ,IAAKA,CAAAA,MAAAA;AACbC,MAAAA,MAAAA,EAAQ,IAAKA,CAAAA;AACf,KAAA;AACF;AAEAkB,EAAAA,YAAAA,CAAa,EAAEnB,MAAAA,EAAQC,MAAQ,EAAA,GAAGmB,UAAoD,EAAA;AACpF,IAAA,KAAA,CAAMD,aAAaC,QAAAA,CAAAA;AACnBC,IAAAA,MAAAA,CAAOC,OAAO,IAAM,EAAA;AAClBtB,MAAAA,MAAAA,EAAQA,MAAUU,IAAAA,KAAAA;AAClBT,MAAAA,MAAAA,EAAQA,MAAUjB,IAAAA;KACpB,CAAA;AACF;AACF","file":"webCrawler.cjs","sourcesContent":["/**\n * Copyright 2025 IBM Corp.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport {\n  BaseToolOptions,\n  BaseToolRunOptions,\n  ToolEmitter,\n  JSONToolOutput,\n  Tool,\n  ToolInput,\n} from \"@/tools/base.js\";\nimport { z } from \"zod\";\nimport { Cache } from \"@/cache/decoratorCache.js\";\nimport { stripHtml } from \"string-strip-html\";\nimport { RunContext } from \"@/context.js\";\nimport { Emitter } from \"@/emitter/emitter.js\";\n\ninterface CrawlerOutput {\n  url: string;\n  statusCode: number;\n  statusText: string;\n  contentType: string;\n  content: string;\n}\n\nexport class WebCrawlerToolOutput extends JSONToolOutput<CrawlerOutput> {\n  @Cache()\n  getTextContent(): string {\n    return [\n      `URL: ${this.result.url}`,\n      `STATUS: ${this.result.statusCode} (${this.result.statusText})`,\n      `CONTENT-TYPE: ${this.result.contentType}`,\n      `CONTENT: ${this.result.content}`,\n    ].join(\"\\n\");\n  }\n}\n\nexport type HttpClient = (url: string, options?: RequestInit) => Promise<HttpClientResponse>;\ninterface HttpClientResponse {\n  status: number;\n  statusText: string;\n  headers: Headers;\n  text(): Promise<string>;\n}\n\ntype Parser = (response: HttpClientResponse) => Promise<string>;\ninterface WebsiteCrawlerToolOptions extends BaseToolOptions {\n  client?: HttpClient;\n  parser?: Parser;\n  request?: RequestInit;\n}\n\nasync function defaultParser(response: HttpClientResponse) {\n  const text = await response.text();\n  if (text) {\n    const contentType = response.headers.get(\"content-type\") ?? \"\";\n    if (contentType.includes(\"text/html\")) {\n      return stripHtml(text).result;\n    }\n  }\n  return text || \"No Content\";\n}\n\nexport class WebCrawlerTool extends Tool<WebCrawlerToolOutput, WebsiteCrawlerToolOptions> {\n  name = \"WebCrawler\";\n  description = `Retrieves content of an arbitrary website.`;\n  inputSchema() {\n    return z.object({\n      url: z.string().url().describe(\"Website URL\"),\n    });\n  }\n\n  protected client: HttpClient;\n  protected parser: Parser;\n\n  public readonly emitter: ToolEmitter<ToolInput<this>, WebCrawlerToolOutput> = Emitter.root.child({\n    namespace: [\"tool\", \"webCrawler\"],\n    creator: this,\n  });\n\n  constructor({ client, parser, ...options }: WebsiteCrawlerToolOptions = {}) {\n    super(options);\n    this.client = client ?? fetch;\n    this.parser = parser ?? defaultParser;\n  }\n\n  protected async _run(\n    { url }: ToolInput<this>,\n    _options: Partial<BaseToolRunOptions>,\n    run: RunContext<this>,\n  ) {\n    const response = await this.client(url, {\n      redirect: \"follow\",\n      ...this.options.request,\n      signal: run.signal,\n    });\n\n    const content = await this.parser(response);\n    return new WebCrawlerToolOutput({\n      url,\n      statusCode: response.status,\n      statusText: response.statusText,\n      contentType: response.headers.get(\"content-type\") ?? \"unknown\",\n      content,\n    });\n  }\n\n  createSnapshot() {\n    return {\n      ...super.createSnapshot(),\n      client: this.client,\n      parser: this.parser,\n    };\n  }\n\n  loadSnapshot({ client, parser, ...snapshot }: ReturnType<typeof this.createSnapshot>) {\n    super.loadSnapshot(snapshot);\n    Object.assign(this, {\n      client: client ?? fetch,\n      parser: parser ?? defaultParser,\n    });\n  }\n}\n"]}