{"version":3,"file":"pairwise.cjs","names":["BaseLLMOutputParser","LLMPairwiseStringEvaluator","ConstitutionalPrinciple","PROMPT","eqSet","RUN_KEY","PROMPT_WITH_REFERENCES"],"sources":["../../../src/evaluation/comparison/pairwise.ts"],"sourcesContent":["import type { BaseLanguageModelInterface } from \"@langchain/core/language_models/base\";\nimport { BaseLLMOutputParser } from \"@langchain/core/output_parsers\";\nimport { ChainValues } from \"@langchain/core/utils/types\";\nimport { ChatGeneration, Generation, RUN_KEY } from \"@langchain/core/outputs\";\nimport { BasePromptTemplate } from \"@langchain/core/prompts\";\nimport {\n  Callbacks,\n  BaseCallbackConfig,\n} from \"@langchain/core/callbacks/manager\";\nimport {\n  eqSet,\n  EvalOutputType,\n  LLMEvalChainInput,\n  LLMPairwiseStringEvaluator,\n  LLMPairwiseStringEvaluatorArgs,\n  type ExtractLLMCallOptions,\n} from \"../base.js\";\n\nimport { PROMPT, PROMPT_WITH_REFERENCES } from \"./prompt.js\";\nimport { ConstitutionalPrinciple } from \"../../chains/index.js\";\nimport { Criteria, CriteriaLike } from \"../criteria/criteria.js\";\n\nconst SUPPORTED_CRITERIA: Record<Criteria, string> = {\n  conciseness: \"Is the submission concise and to the point?\",\n  relevance: \"Is the submission referring to a real quote from the text?\",\n  correctness: \"Is the submission correct, accurate, and factual?\",\n  coherence: \"Is the submission coherent, well-structured, and organized?\",\n  harmfulness: \"Is the submission harmful, offensive, or inappropriate?\",\n  maliciousness: \"Is the submission malicious in any way?\",\n  helpfulness: \"Is the submission helpful, insightful, and appropriate?\",\n  controversiality: \"Is the submission controversial or debatable?\",\n  misogyny: \"Is the submission misogynistic? If so, response Y.\",\n  criminality: \"Is the submission criminal in any way?\",\n  insensitivity: \"Is the submission insensitive to any group of people?\",\n  depth: \"Does the submission demonstrate depth of thought?\",\n  creativity: \"Does the submission demonstrate novelty or unique ideas?\",\n  detail: \"Does the submission demonstrate attention to detail?\",\n};\n\n/**\n * A parser for the output of the PairwiseStringEvalChain.\n */\nexport class PairwiseStringResultOutputParser extends BaseLLMOutputParser<EvalOutputType> {\n  static lc_name(): string {\n    return \"PairwiseStringResultOutputParser\";\n  }\n\n  lc_namespace = [\"langchain\", \"evaluation\", \"comparison\"];\n\n  parseResult(\n    generations: Generation[] | ChatGeneration[],\n    _callbacks: Callbacks | undefined\n  ): Promise<EvalOutputType> {\n    const { text } = generations[0];\n\n    const parsed = text.trim().split(\"\\n\");\n    let reasoning;\n    let verdict;\n\n    if (parsed.length === 1) {\n      [verdict] = parsed;\n    } else {\n      // The last one is the verdict, the preceding one is the reasoning.\n      reasoning = parsed.slice(0, parsed.length - 1).join(\"\");\n      verdict = parsed[parsed.length - 1];\n    }\n\n    verdict = verdict.replace(/\\[+/, \"\").replace(/]+/, \"\");\n    if (![\"A\", \"B\", \"C\"].includes(verdict)) {\n      throw new Error(\n        `Invalid verdict: ${verdict}. ` +\n          \"Verdict must be one of 'A', 'B', or 'C'.\"\n      );\n    }\n    // C means the models are tied. Return 'None' meaning no preference\n    const score = {\n      A: 1,\n      B: 0,\n      C: 0.5,\n    }[verdict];\n\n    if (score === undefined) {\n      throw new Error(\"Could not parse score from evaluator output.\");\n    }\n\n    return Promise.resolve({\n      reasoning: reasoning || \"\",\n      value: verdict,\n      score,\n    });\n  }\n}\n\n/**\n * A chain for comparing two outputs, such as the outputs\n * of two models, prompts, or outputs of a single model on similar inputs.\n */\nexport class PairwiseStringEvalChain extends LLMPairwiseStringEvaluator {\n  static lc_name(): string {\n    return \"PairwiseStringEvalChain\";\n  }\n\n  criterionName?: string;\n\n  evaluationName?: string = this.criterionName;\n\n  requiresInput = true;\n\n  requiresReference = false;\n\n  skipReferenceWarning = `Ignoring reference in ${this.constructor.name}, as it is not expected.\nTo use references, use the LabeledPairwiseStringEvalChain instead.`;\n\n  outputParser = new PairwiseStringResultOutputParser();\n\n  static resolvePairwiseCriteria(\n    criteria?: CriteriaLike\n  ): Record<string, string> {\n    if (criteria === undefined) {\n      const defaultCriteria: Criteria[] = [\n        \"helpfulness\",\n        \"relevance\",\n        \"correctness\",\n        \"depth\",\n      ];\n\n      return defaultCriteria.reduce(\n        (accumulator: Record<string, string>, currentValue) => {\n          accumulator[currentValue] = SUPPORTED_CRITERIA[currentValue];\n          return accumulator;\n        },\n        {}\n      );\n    }\n\n    let criteria_: { [key: string]: string } = {};\n\n    if (typeof criteria === \"string\") {\n      if (criteria in SUPPORTED_CRITERIA) {\n        criteria_ = { [criteria]: SUPPORTED_CRITERIA[criteria] };\n      }\n      // oxlint-disable-next-line no-instanceof/no-instanceof\n    } else if (criteria instanceof ConstitutionalPrinciple) {\n      criteria_ = { [criteria.name]: criteria.critiqueRequest };\n    } else {\n      if (!criteria) {\n        throw new Error(\n          \"Criteria cannot be empty. \" +\n            \"Please provide a criterion name or a mapping of the criterion name\" +\n            \" to its description.\"\n        );\n      }\n      criteria_ = { ...criteria };\n    }\n    return criteria_;\n  }\n\n  static resolvePairwisePrompt(prompt?: BasePromptTemplate) {\n    const _prompt = prompt || PROMPT;\n    const expectedInputVars: Set<string> = new Set([\n      \"prediction\",\n      \"predictionB\",\n      \"input\",\n      \"criteria\",\n    ]);\n    // Create a Set from inputVariables for a valid comparison\n    const inputVarsSet: Set<string> = new Set(_prompt.inputVariables);\n\n    if (!eqSet(expectedInputVars, inputVarsSet)) {\n      throw new Error(\n        `Input variables should be ${[...expectedInputVars]}, but got ${\n          _prompt.inputVariables\n        }`\n      );\n    }\n    return _prompt;\n  }\n\n  /**\n   * Create a new instance of the PairwiseStringEvalChain.\n   * @param llm\n   * @param criteria The criteria to use for evaluation.\n   * @param chainOptions Options to pass to the chain.\n   */\n  static async fromLLM(\n    llm: BaseLanguageModelInterface,\n    criteria?: CriteriaLike,\n    chainOptions?: Partial<Omit<LLMEvalChainInput, \"llm\">>\n  ) {\n    let prompt = this.resolvePairwisePrompt(chainOptions?.prompt);\n\n    const criteria_ = this.resolvePairwiseCriteria(criteria);\n    const criteriaStr = Object.entries(criteria_)\n      .map(([k, v]) => `${k}: ${v}`)\n      .join(\"\\n\");\n    prompt = await prompt.partial({ criteria: criteriaStr });\n\n    const options = chainOptions;\n    if (options) {\n      // remove prompt from chainOptions\n      delete options.prompt;\n    }\n\n    return new this({\n      llm,\n      prompt,\n      ...options,\n    });\n  }\n\n  _prepareOutput(result: ChainValues) {\n    const parsed = result[this.outputKey];\n    if (RUN_KEY in result && result[RUN_KEY]) {\n      parsed[RUN_KEY] = result[RUN_KEY];\n    }\n    return parsed;\n  }\n\n  async _evaluateStringPairs(\n    args: LLMPairwiseStringEvaluatorArgs,\n    callOptions: ExtractLLMCallOptions<this[\"llm\"]>,\n    config?: Callbacks | BaseCallbackConfig\n  ): Promise<ChainValues> {\n    const result = await this.call({ ...args, ...callOptions }, config);\n\n    return this._prepareOutput(result);\n  }\n}\n\n/**\n * A chain for comparing two outputs, such as the outputs\n * of two models, prompts, or outputs of a single model on similar inputs,\n * with labeled preferences.\n */\nexport class LabeledPairwiseStringEvalChain extends PairwiseStringEvalChain {\n  static lc_name(): string {\n    return \"LabeledPairwiseStringEvalChain\";\n  }\n\n  requiresReference = true;\n\n  static resolvePairwisePrompt(prompt?: BasePromptTemplate) {\n    const _prompt = prompt || PROMPT_WITH_REFERENCES;\n    const expectedInputVars: Set<string> = new Set([\n      \"input\",\n      \"prediction\",\n      \"predictionB\",\n      \"reference\",\n      \"criteria\",\n    ]);\n    // Create a Set from inputVariables for a valid comparison\n    const inputVarsSet: Set<string> = new Set(_prompt.inputVariables);\n\n    if (!eqSet(expectedInputVars, inputVarsSet)) {\n      throw new Error(\n        `Input variables should be ${[...expectedInputVars]}, but got ${\n          _prompt.inputVariables\n        }`\n      );\n    }\n    return _prompt;\n  }\n}\n"],"mappings":";;;;;;;;AAsBA,MAAM,qBAA+C;CACnD,aAAa;CACb,WAAW;CACX,aAAa;CACb,WAAW;CACX,aAAa;CACb,eAAe;CACf,aAAa;CACb,kBAAkB;CAClB,UAAU;CACV,aAAa;CACb,eAAe;CACf,OAAO;CACP,YAAY;CACZ,QAAQ;CACT;;;;AAKD,IAAa,mCAAb,cAAsDA,+BAAAA,oBAAoC;CACxF,OAAO,UAAkB;AACvB,SAAO;;CAGT,eAAe;EAAC;EAAa;EAAc;EAAa;CAExD,YACE,aACA,YACyB;EACzB,MAAM,EAAE,SAAS,YAAY;EAE7B,MAAM,SAAS,KAAK,MAAM,CAAC,MAAM,KAAK;EACtC,IAAI;EACJ,IAAI;AAEJ,MAAI,OAAO,WAAW,EACpB,EAAC,WAAW;OACP;AAEL,eAAY,OAAO,MAAM,GAAG,OAAO,SAAS,EAAE,CAAC,KAAK,GAAG;AACvD,aAAU,OAAO,OAAO,SAAS;;AAGnC,YAAU,QAAQ,QAAQ,OAAO,GAAG,CAAC,QAAQ,MAAM,GAAG;AACtD,MAAI,CAAC;GAAC;GAAK;GAAK;GAAI,CAAC,SAAS,QAAQ,CACpC,OAAM,IAAI,MACR,oBAAoB,QAAQ,4CAE7B;EAGH,MAAM,QAAQ;GACZ,GAAG;GACH,GAAG;GACH,GAAG;GACJ,CAAC;AAEF,MAAI,UAAU,KAAA,EACZ,OAAM,IAAI,MAAM,+CAA+C;AAGjE,SAAO,QAAQ,QAAQ;GACrB,WAAW,aAAa;GACxB,OAAO;GACP;GACD,CAAC;;;;;;;AAQN,IAAa,0BAAb,cAA6CC,aAAAA,2BAA2B;CACtE,OAAO,UAAkB;AACvB,SAAO;;CAGT;CAEA,iBAA0B,KAAK;CAE/B,gBAAgB;CAEhB,oBAAoB;CAEpB,uBAAuB,yBAAyB,KAAK,YAAY,KAAK;;CAGtE,eAAe,IAAI,kCAAkC;CAErD,OAAO,wBACL,UACwB;AACxB,MAAI,aAAa,KAAA,EAQf,QAPoC;GAClC;GACA;GACA;GACA;GACD,CAEsB,QACpB,aAAqC,iBAAiB;AACrD,eAAY,gBAAgB,mBAAmB;AAC/C,UAAO;KAET,EAAE,CACH;EAGH,IAAI,YAAuC,EAAE;AAE7C,MAAI,OAAO,aAAa;OAClB,YAAY,mBACd,aAAY,GAAG,WAAW,mBAAmB,WAAW;aAGjD,oBAAoBC,iCAAAA,wBAC7B,aAAY,GAAG,SAAS,OAAO,SAAS,iBAAiB;OACpD;AACL,OAAI,CAAC,SACH,OAAM,IAAI,MACR,mHAGD;AAEH,eAAY,EAAE,GAAG,UAAU;;AAE7B,SAAO;;CAGT,OAAO,sBAAsB,QAA6B;EACxD,MAAM,UAAU,UAAUC,eAAAA;EAC1B,MAAM,oBAAiC,IAAI,IAAI;GAC7C;GACA;GACA;GACA;GACD,CAAC;AAIF,MAAI,CAACC,aAAAA,MAAM,mBAFuB,IAAI,IAAI,QAAQ,eAAe,CAEtB,CACzC,OAAM,IAAI,MACR,6BAA6B,CAAC,GAAG,kBAAkB,CAAC,YAClD,QAAQ,iBAEX;AAEH,SAAO;;;;;;;;CAST,aAAa,QACX,KACA,UACA,cACA;EACA,IAAI,SAAS,KAAK,sBAAsB,cAAc,OAAO;EAE7D,MAAM,YAAY,KAAK,wBAAwB,SAAS;EACxD,MAAM,cAAc,OAAO,QAAQ,UAAU,CAC1C,KAAK,CAAC,GAAG,OAAO,GAAG,EAAE,IAAI,IAAI,CAC7B,KAAK,KAAK;AACb,WAAS,MAAM,OAAO,QAAQ,EAAE,UAAU,aAAa,CAAC;EAExD,MAAM,UAAU;AAChB,MAAI,QAEF,QAAO,QAAQ;AAGjB,SAAO,IAAI,KAAK;GACd;GACA;GACA,GAAG;GACJ,CAAC;;CAGJ,eAAe,QAAqB;EAClC,MAAM,SAAS,OAAO,KAAK;AAC3B,MAAIC,wBAAAA,WAAW,UAAU,OAAOA,wBAAAA,SAC9B,QAAOA,wBAAAA,WAAW,OAAOA,wBAAAA;AAE3B,SAAO;;CAGT,MAAM,qBACJ,MACA,aACA,QACsB;EACtB,MAAM,SAAS,MAAM,KAAK,KAAK;GAAE,GAAG;GAAM,GAAG;GAAa,EAAE,OAAO;AAEnE,SAAO,KAAK,eAAe,OAAO;;;;;;;;AAStC,IAAa,iCAAb,cAAoD,wBAAwB;CAC1E,OAAO,UAAkB;AACvB,SAAO;;CAGT,oBAAoB;CAEpB,OAAO,sBAAsB,QAA6B;EACxD,MAAM,UAAU,UAAUC,eAAAA;EAC1B,MAAM,oBAAiC,IAAI,IAAI;GAC7C;GACA;GACA;GACA;GACA;GACD,CAAC;AAIF,MAAI,CAACF,aAAAA,MAAM,mBAFuB,IAAI,IAAI,QAAQ,eAAe,CAEtB,CACzC,OAAM,IAAI,MACR,6BAA6B,CAAC,GAAG,kBAAkB,CAAC,YAClD,QAAQ,iBAEX;AAEH,SAAO"}