{"version":3,"file":"config.cjs","names":[],"sources":["../../src/smith/config.ts"],"sourcesContent":["import { BaseLanguageModel } from \"@langchain/core/language_models/base\";\nimport { RunnableConfig } from \"@langchain/core/runnables\";\nimport { Example, Run } from \"langsmith\";\nimport { EvaluationResult, RunEvaluator } from \"langsmith/evaluation\";\nimport {\n  Criteria as CriteriaType,\n  type EmbeddingDistanceEvalChainInput,\n} from \"../evaluation/index.js\";\nimport { LoadEvaluatorOptions } from \"../evaluation/loader.js\";\nimport { EvaluatorType } from \"../evaluation/types.js\";\n\nexport type EvaluatorInputs = {\n  input?: string | unknown;\n  prediction: string | unknown;\n  reference?: string | unknown;\n};\n\nexport type EvaluatorInputFormatter = ({\n  rawInput,\n  rawPrediction,\n  rawReferenceOutput,\n  run,\n}: {\n  // oxlint-disable-next-line @typescript-eslint/no-explicit-any\n  rawInput: any;\n  // oxlint-disable-next-line @typescript-eslint/no-explicit-any\n  rawPrediction: any;\n  // oxlint-disable-next-line @typescript-eslint/no-explicit-any\n  rawReferenceOutput?: any;\n  run: Run;\n}) => EvaluatorInputs;\n\nexport type DynamicRunEvaluatorParams<\n  // oxlint-disable-next-line @typescript-eslint/no-explicit-any\n  Input extends Record<string, any> = Record<string, unknown>,\n  // oxlint-disable-next-line @typescript-eslint/no-explicit-any\n  Prediction extends Record<string, any> = Record<string, unknown>,\n  // oxlint-disable-next-line @typescript-eslint/no-explicit-any\n  Reference extends Record<string, any> = Record<string, unknown>,\n> = {\n  input: Input;\n  prediction?: Prediction;\n  reference?: Reference;\n  run: Run;\n  example?: Example;\n};\n\n/**\n * Type of a function that can be coerced into a RunEvaluator function.\n * While we have the class-based RunEvaluator, it's often more convenient to directly\n * pass a function to the runner. This type allows us to do that.\n */\nexport type RunEvaluatorLike =\n  | ((\n      props: DynamicRunEvaluatorParams,\n      options: RunnableConfig\n    ) => Promise<EvaluationResult>)\n  | ((\n      props: DynamicRunEvaluatorParams,\n      options: RunnableConfig\n    ) => EvaluationResult);\n\nexport function isOffTheShelfEvaluator<\n  T extends keyof EvaluatorType,\n  U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike,\n>(evaluator: T | EvalConfig | U): evaluator is T | EvalConfig {\n  return typeof evaluator === \"string\" || \"evaluatorType\" in evaluator;\n}\n\nexport function isCustomEvaluator<\n  T extends keyof EvaluatorType,\n  U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike,\n>(evaluator: T | EvalConfig | U): evaluator is U {\n  return !isOffTheShelfEvaluator(evaluator);\n}\n\nexport type RunEvalType<\n  T extends keyof EvaluatorType =\n    | \"criteria\"\n    | \"labeled_criteria\"\n    | \"embedding_distance\",\n  U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike,\n> = T | EvalConfig | U;\n\n/**\n * Configuration class for running evaluations on datasets.\n *\n * @remarks\n * RunEvalConfig in LangSmith is a configuration class for running evaluations on datasets. Its primary purpose is to define the parameters and evaluators that will be applied during the evaluation of a dataset. This configuration can include various evaluators, custom evaluators, and different keys for inputs, predictions, and references.\n *\n * @typeparam T - The type of evaluators.\n * @typeparam U - The type of custom evaluators.\n */\nexport type RunEvalConfig<\n  T extends keyof EvaluatorType =\n    | \"criteria\"\n    | \"labeled_criteria\"\n    | \"embedding_distance\",\n  U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike,\n> = {\n  /**\n   * Evaluators to apply to a dataset run.\n   * You can optionally specify these by name, or by\n   * configuring them with an EvalConfig object.\n   */\n  evaluators?: RunEvalType<T, U>[];\n\n  /**\n   * Convert the evaluation data into formats that can be used by the evaluator.\n   * This should most commonly be a string.\n   * Parameters are the raw input from the run, the raw output, raw reference output, and the raw run.\n   * @example\n   * ```ts\n   * // Chain input: { input: \"some string\" }\n   * // Chain output: { output: \"some output\" }\n   * // Reference example output format: { output: \"some reference output\" }\n   * const formatEvaluatorInputs = ({\n   *   rawInput,\n   *   rawPrediction,\n   *   rawReferenceOutput,\n   * }) => {\n   *   return {\n   *     input: rawInput.input,\n   *     prediction: rawPrediction.output,\n   *     reference: rawReferenceOutput.output,\n   *   };\n   * };\n   * ```\n   * @returns The prepared data.\n   */\n  formatEvaluatorInputs?: EvaluatorInputFormatter;\n\n  /**\n   * Custom evaluators to apply to a dataset run.\n   * Each evaluator is provided with a run trace containing the model\n   * outputs, as well as an \"example\" object representing a record\n   * in the dataset.\n   *\n   * @deprecated Use `evaluators` instead.\n   */\n  customEvaluators?: U[];\n};\n\nexport interface EvalConfig extends LoadEvaluatorOptions {\n  /**\n   * The name of the evaluator to use.\n   * Example: labeled_criteria, criteria, etc.\n   */\n  evaluatorType: keyof EvaluatorType;\n\n  /**\n   * The feedback (or metric) name to use for the logged\n   * evaluation results. If none provided, we default to\n   * the evaluationName.\n   */\n  feedbackKey?: string;\n\n  /**\n   * Convert the evaluation data into formats that can be used by the evaluator.\n   * This should most commonly be a string.\n   * Parameters are the raw input from the run, the raw output, raw reference output, and the raw run.\n   * @example\n   * ```ts\n   * // Chain input: { input: \"some string\" }\n   * // Chain output: { output: \"some output\" }\n   * // Reference example output format: { output: \"some reference output\" }\n   * const formatEvaluatorInputs = ({\n   *   rawInput,\n   *   rawPrediction,\n   *   rawReferenceOutput,\n   * }) => {\n   *   return {\n   *     input: rawInput.input,\n   *     prediction: rawPrediction.output,\n   *     reference: rawReferenceOutput.output,\n   *   };\n   * };\n   * ```\n   * @returns The prepared data.\n   */\n  formatEvaluatorInputs: EvaluatorInputFormatter;\n}\n\nconst isStringifiableValue = (\n  value: unknown\n): value is string | number | boolean | bigint =>\n  typeof value === \"string\" ||\n  typeof value === \"number\" ||\n  typeof value === \"boolean\" ||\n  typeof value === \"bigint\";\n\nconst getSingleStringifiedValue = (value: unknown) => {\n  if (isStringifiableValue(value)) {\n    return `${value}`;\n  }\n\n  if (typeof value === \"object\" && value != null && !Array.isArray(value)) {\n    const entries = Object.entries(value);\n\n    if (entries.length === 1 && isStringifiableValue(entries[0][1])) {\n      return `${entries[0][1]}`;\n    }\n  }\n\n  console.warn(\"Non-stringifiable value found when coercing\", value);\n  return `${value}`;\n};\n\n/**\n * Configuration to load a \"CriteriaEvalChain\" evaluator,\n * which prompts an LLM to determine whether the model's\n * prediction complies with the provided criteria.\n * @param criteria - The criteria to use for the evaluator.\n * @param llm - The language model to use for the evaluator.\n * @returns The configuration for the evaluator.\n * @example\n * ```ts\n * const evalConfig = {\n *   evaluators: [Criteria(\"helpfulness\")],\n * };\n * @example\n * ```ts\n * const evalConfig = {\n *   evaluators: [\n *     Criteria({\n *       \"isCompliant\": \"Does the submission comply with the requirements of XYZ\"\n *     })\n *   ],\n * };\n * @example\n * ```ts\n * const evalConfig = {\n *   evaluators: [{\n *     evaluatorType: \"criteria\",\n *     criteria: \"helpfulness\"\n *     formatEvaluatorInputs: ...\n *   }]\n * };\n * ```\n * @example\n * ```ts\n * const evalConfig = {\n *   evaluators: [{\n *     evaluatorType: \"criteria\",\n *     criteria: { \"isCompliant\": \"Does the submission comply with the requirements of XYZ\" },\n *     formatEvaluatorInputs: ...\n *   }]\n * };\n */\nexport type Criteria = EvalConfig & {\n  evaluatorType: \"criteria\";\n\n  /**\n   * The \"criteria\" to insert into the prompt template\n   * used for evaluation. See the prompt at\n   * https://smith.langchain.com/hub/langchain-ai/criteria-evaluator\n   * for more information.\n   */\n  criteria?: CriteriaType | Record<string, string>;\n\n  /**\n   * The language model to use as the evaluator, defaults to GPT-4\n   */\n  llm?: BaseLanguageModel;\n};\n\n// for compatibility reasons\nexport type CriteriaEvalChainConfig = Criteria;\n\nexport function Criteria(\n  criteria: CriteriaType | Record<string, string>,\n  config?: Pick<\n    Partial<LabeledCriteria>,\n    \"formatEvaluatorInputs\" | \"llm\" | \"feedbackKey\"\n  >\n): EvalConfig {\n  const formatEvaluatorInputs =\n    config?.formatEvaluatorInputs ??\n    ((payload) => ({\n      prediction: getSingleStringifiedValue(payload.rawPrediction),\n      input: getSingleStringifiedValue(payload.rawInput),\n    }));\n\n  if (typeof criteria !== \"string\" && Object.keys(criteria).length !== 1) {\n    throw new Error(\n      \"Only one criteria key is allowed when specifying custom criteria.\"\n    );\n  }\n\n  const criteriaKey =\n    typeof criteria === \"string\" ? criteria : Object.keys(criteria)[0];\n\n  return {\n    evaluatorType: \"criteria\",\n    criteria,\n    feedbackKey: config?.feedbackKey ?? criteriaKey,\n    llm: config?.llm,\n    formatEvaluatorInputs,\n  };\n}\n\n/**\n * Configuration to load a \"LabeledCriteriaEvalChain\" evaluator,\n * which prompts an LLM to determine whether the model's\n * prediction complies with the provided criteria and also\n * provides a \"ground truth\" label for the evaluator to incorporate\n * in its evaluation.\n * @param criteria - The criteria to use for the evaluator.\n * @param llm - The language model to use for the evaluator.\n * @returns The configuration for the evaluator.\n * @example\n * ```ts\n * const evalConfig = {\n *   evaluators: [LabeledCriteria(\"correctness\")],\n * };\n * @example\n * ```ts\n * const evalConfig = {\n *   evaluators: [\n *     LabeledCriteria({\n *       \"mentionsAllFacts\": \"Does the include all facts provided in the reference?\"\n *     })\n *   ],\n * };\n * @example\n * ```ts\n * const evalConfig = {\n *   evaluators: [{\n *     evaluatorType: \"labeled_criteria\",\n *     criteria: \"correctness\",\n *     formatEvaluatorInputs: ...\n *   }],\n * };\n * ```\n * @example\n * ```ts\n * const evalConfig = {\n *   evaluators: [{\n *     evaluatorType: \"labeled_criteria\",\n *     criteria: { \"mentionsAllFacts\": \"Does the include all facts provided in the reference?\" },\n *     formatEvaluatorInputs: ...\n *   }],\n * };\n */\nexport type LabeledCriteria = EvalConfig & {\n  evaluatorType: \"labeled_criteria\";\n\n  /**\n   * The \"criteria\" to insert into the prompt template\n   * used for evaluation. See the prompt at\n   * https://smith.langchain.com/hub/langchain-ai/labeled-criteria\n   * for more information.\n   */\n  criteria?: CriteriaType | Record<string, string>;\n\n  /**\n   * The language model to use as the evaluator, defaults to GPT-4\n   */\n  llm?: BaseLanguageModel;\n};\n\nexport function LabeledCriteria(\n  criteria: CriteriaType | Record<string, string>,\n  config?: Pick<\n    Partial<LabeledCriteria>,\n    \"formatEvaluatorInputs\" | \"llm\" | \"feedbackKey\"\n  >\n): LabeledCriteria {\n  const formatEvaluatorInputs =\n    config?.formatEvaluatorInputs ??\n    ((payload) => ({\n      prediction: getSingleStringifiedValue(payload.rawPrediction),\n      input: getSingleStringifiedValue(payload.rawInput),\n      reference: getSingleStringifiedValue(payload.rawReferenceOutput),\n    }));\n\n  if (typeof criteria !== \"string\" && Object.keys(criteria).length !== 1) {\n    throw new Error(\n      \"Only one labeled criteria key is allowed when specifying custom criteria.\"\n    );\n  }\n\n  const criteriaKey =\n    typeof criteria === \"string\" ? criteria : Object.keys(criteria)[0];\n\n  return {\n    evaluatorType: \"labeled_criteria\",\n    criteria,\n    feedbackKey: config?.feedbackKey ?? criteriaKey,\n    llm: config?.llm,\n    formatEvaluatorInputs,\n  };\n}\n\n/**\n * Configuration to load a \"EmbeddingDistanceEvalChain\" evaluator,\n * which embeds distances to score semantic difference between\n * a prediction and reference.\n */\nexport type EmbeddingDistance = EvalConfig &\n  EmbeddingDistanceEvalChainInput & { evaluatorType: \"embedding_distance\" };\n\nexport function EmbeddingDistance(\n  distanceMetric: EmbeddingDistanceEvalChainInput[\"distanceMetric\"],\n  config?: Pick<\n    Partial<LabeledCriteria>,\n    \"formatEvaluatorInputs\" | \"embedding\" | \"feedbackKey\"\n  >\n): EmbeddingDistance {\n  const formatEvaluatorInputs =\n    config?.formatEvaluatorInputs ??\n    ((payload) => ({\n      prediction: getSingleStringifiedValue(payload.rawPrediction),\n      reference: getSingleStringifiedValue(payload.rawReferenceOutput),\n    }));\n\n  return {\n    evaluatorType: \"embedding_distance\",\n    embedding: config?.embedding,\n    distanceMetric,\n    feedbackKey: config?.feedbackKey ?? \"embedding_distance\",\n    formatEvaluatorInputs,\n  };\n}\n"],"mappings":";AA8DA,SAAgB,uBAGd,WAA4D;AAC5D,QAAO,OAAO,cAAc,YAAY,mBAAmB;;AAG7D,SAAgB,kBAGd,WAA+C;AAC/C,QAAO,CAAC,uBAAuB,UAAU;;AA8G3C,MAAM,wBACJ,UAEA,OAAO,UAAU,YACjB,OAAO,UAAU,YACjB,OAAO,UAAU,aACjB,OAAO,UAAU;AAEnB,MAAM,6BAA6B,UAAmB;AACpD,KAAI,qBAAqB,MAAM,CAC7B,QAAO,GAAG;AAGZ,KAAI,OAAO,UAAU,YAAY,SAAS,QAAQ,CAAC,MAAM,QAAQ,MAAM,EAAE;EACvE,MAAM,UAAU,OAAO,QAAQ,MAAM;AAErC,MAAI,QAAQ,WAAW,KAAK,qBAAqB,QAAQ,GAAG,GAAG,CAC7D,QAAO,GAAG,QAAQ,GAAG;;AAIzB,SAAQ,KAAK,+CAA+C,MAAM;AAClE,QAAO,GAAG;;AAgEZ,SAAgB,SACd,UACA,QAIY;CACZ,MAAM,wBACJ,QAAQ,2BACN,aAAa;EACb,YAAY,0BAA0B,QAAQ,cAAc;EAC5D,OAAO,0BAA0B,QAAQ,SAAS;EACnD;AAEH,KAAI,OAAO,aAAa,YAAY,OAAO,KAAK,SAAS,CAAC,WAAW,EACnE,OAAM,IAAI,MACR,oEACD;CAGH,MAAM,cACJ,OAAO,aAAa,WAAW,WAAW,OAAO,KAAK,SAAS,CAAC;AAElE,QAAO;EACL,eAAe;EACf;EACA,aAAa,QAAQ,eAAe;EACpC,KAAK,QAAQ;EACb;EACD;;AA+DH,SAAgB,gBACd,UACA,QAIiB;CACjB,MAAM,wBACJ,QAAQ,2BACN,aAAa;EACb,YAAY,0BAA0B,QAAQ,cAAc;EAC5D,OAAO,0BAA0B,QAAQ,SAAS;EAClD,WAAW,0BAA0B,QAAQ,mBAAmB;EACjE;AAEH,KAAI,OAAO,aAAa,YAAY,OAAO,KAAK,SAAS,CAAC,WAAW,EACnE,OAAM,IAAI,MACR,4EACD;CAGH,MAAM,cACJ,OAAO,aAAa,WAAW,WAAW,OAAO,KAAK,SAAS,CAAC;AAElE,QAAO;EACL,eAAe;EACf;EACA,aAAa,QAAQ,eAAe;EACpC,KAAK,QAAQ;EACb;EACD;;AAWH,SAAgB,kBACd,gBACA,QAImB;CACnB,MAAM,wBACJ,QAAQ,2BACN,aAAa;EACb,YAAY,0BAA0B,QAAQ,cAAc;EAC5D,WAAW,0BAA0B,QAAQ,mBAAmB;EACjE;AAEH,QAAO;EACL,eAAe;EACf,WAAW,QAAQ;EACnB;EACA,aAAa,QAAQ,eAAe;EACpC;EACD"}