import { RunnableInterface } from "@langchain/core/runnables"; import { BaseChatModel } from "@langchain/core/language_models/chat_models"; import type { z } from "zod"; import { ChatCompletionMessage, EvaluatorResult, FewShotExample, ModelClient, SingleResultScorerReturnType } from "./types.js"; type ZodObjectAny = z.ZodObject; export declare function isZodSchema(input?: ZodObjectAny | Record): input is ZodObjectAny; export declare const _createLLMAsJudgeScorer: (params: { prompt: string | RunnableInterface>> | ((...args: unknown[]) => ChatCompletionMessage[] | Promise); system?: string | undefined; schema?: Record | ZodObjectAny | undefined; judge?: ModelClient | BaseChatModel | undefined; model?: string | undefined; continuous?: boolean | undefined; choices?: number[] | undefined; useReasoning?: boolean | undefined; fewShotExamples?: FewShotExample[] | undefined; }) => (params: { [key: string]: unknown; inputs?: unknown; outputs?: unknown; referenceOutputs?: unknown; }) => Promise; /** * Create an evaluator that uses an LLM to assess output quality based on specified criteria. * * @param params Configuration object with the following properties: * @param params.prompt The evaluation prompt - can be a string template, LangChain prompt template, * or function that returns a list of chat messages * @param params.feedbackKey Key used to store the evaluation result, defaults to "score" * @param params.judge The LLM used for evaluation. Can be an OpenAI client or a LangChain model. * If using OpenAI client, must specify "model" parameter. * If omitted, "model" will be used to instantiate a LangChain model instance. * @param params.model Model identifier to use. * If "judge" is an OpenAI client, this should be a model name directly. * If "judge" is omitted, must be a valid LangChain model identifier. * @param params.system Optional system message to prepend to the prompt * @param params.continuous If true, score will be a float between 0 and 1. * If false, score will be boolean. Defaults to false. * @param params.choices Optional list of specific float values the score must be chosen from * @param params.useReasoning If true, includes explanation for the score in the output. * Defaults to true. * @param params.fewShotExamples Optional list of example evaluations to append to the prompt * @param params.outputSchema Optional JSON schema or Zod schema for the output of the evaluator. * If provided, the created evaluator will return an object conforming to the provided schema. * If you are using an OpenAI client directly, this field must be OpenAI structured output format or JSON schema if provided. * @returns A function that takes inputs, outputs, reference_outputs, and other kwargs, * formats them into a prompt, invokes the judge, and returns an evaluation result * * @example * ```typescript * import { createLLMAsJudge } from "openevals"; * * const evaluator = createLLMAsJudge({ * prompt: "Rate the quality of this response from 0 to 1: {outputs}", * continuous: true, * }); * const result = await evaluator({ * inputs: { question: "What color is the sky?" }, * outputs: { response: "Blue" }, * }); * ``` */ export declare function createLLMAsJudge(params: { prompt: string | RunnableInterface | ((...args: unknown[]) => ChatCompletionMessage[] | Promise); feedbackKey?: string; model?: string; system?: string; judge?: ModelClient | BaseChatModel; continuous?: boolean; choices?: number[]; useReasoning?: boolean; fewShotExamples?: FewShotExample[]; outputSchema?: undefined; }): (params: Record) => Promise>; export declare function createLLMAsJudge(params: { prompt: string | RunnableInterface | ((...args: unknown[]) => ChatCompletionMessage[] | Promise); feedbackKey?: string; model?: string; system?: string; judge?: ModelClient | BaseChatModel; continuous?: boolean; choices?: number[]; useReasoning?: boolean; fewShotExamples?: FewShotExample[]; outputSchema: Record | ZodObjectAny; }): (params: Record) => Promise>; export {};