{"version":3,"file":"trajectory.cjs","names":["BaseLLMOutputParser","AgentTrajectoryEvaluator","EVAL_CHAT_PROMPT","TOOL_FREE_EVAL_CHAT_PROMPT","RUN_KEY"],"sources":["../../../src/evaluation/agents/trajectory.ts"],"sourcesContent":["import type { StructuredToolInterface } from \"@langchain/core/tools\";\nimport { BaseLLMOutputParser } from \"@langchain/core/output_parsers\";\n\nimport { AgentStep } from \"@langchain/core/agents\";\nimport { ChainValues } from \"@langchain/core/utils/types\";\nimport { ChatGeneration, Generation, RUN_KEY } from \"@langchain/core/outputs\";\nimport { BasePromptTemplate } from \"@langchain/core/prompts\";\nimport {\n  Callbacks,\n  BaseCallbackConfig,\n} from \"@langchain/core/callbacks/manager\";\nimport { BaseChatModel } from \"@langchain/core/language_models/chat_models\";\nimport {\n  AgentTrajectoryEvaluator,\n  EvalOutputType,\n  LLMEvalChainInput,\n  LLMTrajectoryEvaluatorArgs,\n  type ExtractLLMCallOptions,\n} from \"../base.js\";\nimport { EVAL_CHAT_PROMPT, TOOL_FREE_EVAL_CHAT_PROMPT } from \"./prompt.js\";\n\n/**\n * A parser for the output of the TrajectoryEvalChain.\n */\nexport class TrajectoryOutputParser extends BaseLLMOutputParser<EvalOutputType> {\n  static lc_name(): string {\n    return \"TrajectoryOutputParser\";\n  }\n\n  lc_namespace = [\"langchain\", \"evaluation\", \"agents\"];\n\n  parseResult(\n    generations: Generation[] | ChatGeneration[],\n    _callbacks: Callbacks | undefined\n  ): Promise<EvalOutputType> {\n    const { text } = generations[0];\n\n    if (!text.includes(\"Score:\")) {\n      throw new Error(`Could not find score in model eval output: ${text}`);\n    }\n\n    let [reasoning, scoreStr] = text.split(\"Score:\", 2);\n    reasoning = reasoning.trim();\n    scoreStr = scoreStr.trim();\n\n    // Use regex to extract the score.\n    // This will get the number in the string, even if it is a float or more than 10.\n    // E.g. \"Score: 1\" will return 1, \"Score: 3.5\" will return 3.5, and\n    // \"Score: 10\" will return 10.\n    // The score should be an integer digit in the range 1-5.\n\n    const scoreMatch = scoreStr.match(/(\\d+(\\.\\d+)?)/);\n    if (scoreMatch === null || scoreMatch[1].includes(\".\")) {\n      throw new Error(\n        `Score is not an integer digit in the range 1-5: ${text}`\n      );\n    }\n\n    const score = +scoreMatch[1];\n    if (score < 1 || score > 5) {\n      throw new Error(`Score is not a digit in the range 1-5: ${text}`);\n    }\n\n    const normalizedScore = (score - 1) / 4;\n\n    return Promise.resolve({\n      reasoning,\n      score: normalizedScore,\n    });\n  }\n}\n\n/**\n * A chain for evaluating ReAct style agents.\n *\n * This chain is used to evaluate ReAct style agents by reasoning about\n * the sequence of actions taken and their outcomes.\n */\nexport class TrajectoryEvalChain extends AgentTrajectoryEvaluator {\n  static lc_name(): string {\n    return \"TrajectoryEvalChain\";\n  }\n\n  criterionName?: string;\n\n  evaluationName?: string = this.criterionName;\n\n  requiresInput = true;\n\n  requiresReference = false;\n\n  outputParser = new TrajectoryOutputParser();\n\n  static resolveTrajectoryPrompt(\n    prompt?: BasePromptTemplate | undefined,\n    agentTools?: StructuredToolInterface[]\n  ) {\n    let _prompt;\n    if (prompt) {\n      _prompt = prompt;\n    } else if (agentTools) {\n      _prompt = EVAL_CHAT_PROMPT;\n    } else {\n      _prompt = TOOL_FREE_EVAL_CHAT_PROMPT;\n    }\n\n    return _prompt;\n  }\n\n  /**\n   * Get the description of the agent tools.\n   *\n   * @returns The description of the agent tools.\n   */\n  static toolsDescription(agentTools: StructuredToolInterface[]): string {\n    return agentTools\n      .map(\n        (tool, i) =>\n          `Tool ${i + 1}: ${tool.name}\\n Description: ${tool.description}`\n      )\n      .join(\"\\n\\n\");\n  }\n\n  /**\n   * Create a new TrajectoryEvalChain.\n   * @param llm\n   * @param agentTools - The tools used by the agent.\n   * @param chainOptions - The options for the chain.\n   */\n  static async fromLLM(\n    llm: BaseChatModel,\n    agentTools?: StructuredToolInterface[],\n    chainOptions?: Partial<Omit<LLMEvalChainInput, \"llm\">>\n  ) {\n    let prompt = this.resolveTrajectoryPrompt(chainOptions?.prompt, agentTools);\n    if (agentTools) {\n      const toolDescriptions = this.toolsDescription(agentTools);\n      prompt = await prompt.partial({ toolDescriptions });\n    }\n\n    const options = chainOptions;\n    if (options) {\n      // remove prompt from chainOptions\n      delete options.prompt;\n    }\n\n    return new this({\n      llm,\n      prompt,\n      ...options,\n    });\n  }\n\n  _prepareOutput(result: ChainValues) {\n    const parsed = result[this.outputKey];\n    if (RUN_KEY in result && result[RUN_KEY]) {\n      parsed[RUN_KEY] = result[RUN_KEY];\n    }\n    return parsed;\n  }\n\n  /**\n   * Get the agent trajectory as a formatted string.\n   *\n   * @param steps - The agent trajectory.\n   * @returns The formatted agent trajectory.\n   */\n  getAgentTrajectory(steps: AgentStep[]): string {\n    return steps\n      .map((step, i) => {\n        const { action, observation } = step;\n\n        return (\n          `Step ${i + 1}:\\n` +\n          `Tool used: ${action.tool}\\n` +\n          `Tool input: ${action.toolInput}\\n` +\n          `Tool output: ${observation}`\n        );\n      })\n      .join(\"\\n\\n\");\n  }\n\n  formatReference(reference?: string): string {\n    if (!reference) {\n      return \"\";\n    }\n    return `\nThe following is the expected answer. Use this to measure correctness:\n[GROUND_TRUTH]\n${reference}\n[END_GROUND_TRUTH]\n        `;\n  }\n\n  async _evaluateAgentTrajectory(\n    args: LLMTrajectoryEvaluatorArgs,\n    callOptions: ExtractLLMCallOptions<this[\"llm\"]>,\n    config?: Callbacks | BaseCallbackConfig\n  ): Promise<ChainValues> {\n    const { input, prediction, reference, agentTrajectory } = args;\n\n    const inputs = {\n      question: input,\n      agentTrajectory: this.getAgentTrajectory(agentTrajectory),\n      answer: prediction,\n      reference: this.formatReference(reference),\n    };\n\n    const result = await this.call({ ...inputs, ...callOptions }, config);\n\n    return this._prepareOutput(result);\n  }\n}\n"],"mappings":";;;;;;;;;AAwBA,IAAa,yBAAb,cAA4CA,+BAAAA,oBAAoC;CAC9E,OAAO,UAAkB;AACvB,SAAO;;CAGT,eAAe;EAAC;EAAa;EAAc;EAAS;CAEpD,YACE,aACA,YACyB;EACzB,MAAM,EAAE,SAAS,YAAY;AAE7B,MAAI,CAAC,KAAK,SAAS,SAAS,CAC1B,OAAM,IAAI,MAAM,8CAA8C,OAAO;EAGvE,IAAI,CAAC,WAAW,YAAY,KAAK,MAAM,UAAU,EAAE;AACnD,cAAY,UAAU,MAAM;AAC5B,aAAW,SAAS,MAAM;EAQ1B,MAAM,aAAa,SAAS,MAAM,gBAAgB;AAClD,MAAI,eAAe,QAAQ,WAAW,GAAG,SAAS,IAAI,CACpD,OAAM,IAAI,MACR,mDAAmD,OACpD;EAGH,MAAM,QAAQ,CAAC,WAAW;AAC1B,MAAI,QAAQ,KAAK,QAAQ,EACvB,OAAM,IAAI,MAAM,0CAA0C,OAAO;EAGnE,MAAM,mBAAmB,QAAQ,KAAK;AAEtC,SAAO,QAAQ,QAAQ;GACrB;GACA,OAAO;GACR,CAAC;;;;;;;;;AAUN,IAAa,sBAAb,cAAyCC,aAAAA,yBAAyB;CAChE,OAAO,UAAkB;AACvB,SAAO;;CAGT;CAEA,iBAA0B,KAAK;CAE/B,gBAAgB;CAEhB,oBAAoB;CAEpB,eAAe,IAAI,wBAAwB;CAE3C,OAAO,wBACL,QACA,YACA;EACA,IAAI;AACJ,MAAI,OACF,WAAU;WACD,WACT,WAAUC,eAAAA;MAEV,WAAUC,eAAAA;AAGZ,SAAO;;;;;;;CAQT,OAAO,iBAAiB,YAA+C;AACrE,SAAO,WACJ,KACE,MAAM,MACL,QAAQ,IAAI,EAAE,IAAI,KAAK,KAAK,kBAAkB,KAAK,cACtD,CACA,KAAK,OAAO;;;;;;;;CASjB,aAAa,QACX,KACA,YACA,cACA;EACA,IAAI,SAAS,KAAK,wBAAwB,cAAc,QAAQ,WAAW;AAC3E,MAAI,YAAY;GACd,MAAM,mBAAmB,KAAK,iBAAiB,WAAW;AAC1D,YAAS,MAAM,OAAO,QAAQ,EAAE,kBAAkB,CAAC;;EAGrD,MAAM,UAAU;AAChB,MAAI,QAEF,QAAO,QAAQ;AAGjB,SAAO,IAAI,KAAK;GACd;GACA;GACA,GAAG;GACJ,CAAC;;CAGJ,eAAe,QAAqB;EAClC,MAAM,SAAS,OAAO,KAAK;AAC3B,MAAIC,wBAAAA,WAAW,UAAU,OAAOA,wBAAAA,SAC9B,QAAOA,wBAAAA,WAAW,OAAOA,wBAAAA;AAE3B,SAAO;;;;;;;;CAST,mBAAmB,OAA4B;AAC7C,SAAO,MACJ,KAAK,MAAM,MAAM;GAChB,MAAM,EAAE,QAAQ,gBAAgB;AAEhC,UACE,QAAQ,IAAI,EAAE,gBACA,OAAO,KAAK,gBACX,OAAO,UAAU,iBAChB;IAElB,CACD,KAAK,OAAO;;CAGjB,gBAAgB,WAA4B;AAC1C,MAAI,CAAC,UACH,QAAO;AAET,SAAO;;;EAGT,UAAU;;;;CAKV,MAAM,yBACJ,MACA,aACA,QACsB;EACtB,MAAM,EAAE,OAAO,YAAY,WAAW,oBAAoB;EAE1D,MAAM,SAAS;GACb,UAAU;GACV,iBAAiB,KAAK,mBAAmB,gBAAgB;GACzD,QAAQ;GACR,WAAW,KAAK,gBAAgB,UAAU;GAC3C;EAED,MAAM,SAAS,MAAM,KAAK,KAAK;GAAE,GAAG;GAAQ,GAAG;GAAa,EAAE,OAAO;AAErE,SAAO,KAAK,eAAe,OAAO"}