import { Dataset, DatasetRow } from './dataset'; import { ColumnMapping } from './fn'; import { Op } from './opType'; import { WeaveObject, WeaveObjectParameters } from './weaveObject'; interface EvaluationParameters extends WeaveObjectParameters { dataset: Dataset; scorers: WeaveCallable<(...args: [{ datasetRow: E; modelOutput: M; }]) => any>[]; maxConcurrency?: number; columnMapping?: ColumnMapping; } interface Runnable any> { id: string; invoke: (...args: Parameters) => ReturnType; } type WeaveCallable any> = Op | Runnable; /** * Sets up an evaluation which includes a set of scorers and a dataset. * * Calling evaluation.evaluate(model) will pass in rows form a dataset into a model matching * the names of the columns of the dataset to the argument names in model.predict. * * Then it will call all of the scorers and save the results in weave. * * @example * // Collect your examples into a dataset * const dataset = new weave.Dataset({ * id: 'my-dataset', * rows: [ * { question: 'What is the capital of France?', expected: 'Paris' }, * { question: 'Who wrote "To Kill a Mockingbird"?', expected: 'Harper Lee' }, * { question: 'What is the square root of 64?', expected: '8' }, * ], * }); * * // Define any custom scoring function * const scoringFunction = weave.op(function isEqual({ modelOutput, datasetRow }) { * return modelOutput == datasetRow.expected; * }); * * // Define the function to evaluate * const model = weave.op(async function alwaysParisModel({ question }) { * return 'Paris'; * }); * * // Start evaluating * const evaluation = new weave.Evaluation({ * id: 'my-evaluation', * dataset: dataset, * scorers: [scoringFunction], * }); * * const results = await evaluation.evaluate({ model }); */ export declare class Evaluation extends WeaveObject { private dataset; private scorers; private columnMapping?; constructor(parameters: EvaluationParameters); evaluate({ model, nTrials, maxConcurrency, }: { model: WeaveCallable<(...args: [{ datasetRow: R; }]) => Promise>; nTrials?: number; maxConcurrency?: number; }): Promise>; predictAndScore({ model, example, columnMapping, }: { model: WeaveCallable<(...args: [{ datasetRow: E; }]) => Promise>; example: R; columnMapping?: ColumnMapping; }): Promise<{ model_success: boolean; model_output: any; scores: { [key: string]: any; }; model_latency: number; }>; private summarizeResults; private summarizeColumn; } export {}; //# sourceMappingURL=evaluation.d.ts.map