/** * Evaluation Framework - Accuracy, Performance, and Reliability evaluation */ export interface EvalResult { passed: boolean; score: number; message?: string; details?: Record; duration: number; } export interface AccuracyEvalConfig { input: string; expectedOutput: string; actualOutput: string; threshold?: number; } export interface PerformanceEvalConfig { func: () => Promise; iterations?: number; warmupRuns?: number; } export interface PerformanceResult extends EvalResult { avgTime: number; minTime: number; maxTime: number; p95Time: number; times: number[]; } export interface ReliabilityEvalConfig { expectedToolCalls: string[]; actualToolCalls: string[]; } /** * Accuracy Evaluation - Compare actual output to expected */ export declare function accuracyEval(config: AccuracyEvalConfig): Promise; /** * Performance Evaluation - Measure execution time */ export declare function performanceEval(config: PerformanceEvalConfig): Promise; /** * Reliability Evaluation - Check tool call accuracy */ export declare function reliabilityEval(config: ReliabilityEvalConfig): Promise; /** * Eval Suite - Run multiple evaluations */ export declare class EvalSuite { private results; runAccuracy(name: string, config: AccuracyEvalConfig): Promise; runPerformance(name: string, config: PerformanceEvalConfig): Promise; runReliability(name: string, config: ReliabilityEvalConfig): Promise; getResults(): Map; getSummary(): { total: number; passed: number; failed: number; avgScore: number; }; printSummary(): void; } export { Evaluator, createEvaluator, createDefaultEvaluator, relevanceCriterion, lengthCriterion, containsKeywordsCriterion, noHarmfulContentCriterion, type EvalCriteria, type EvalResult as BaseEvalResult, type EvalSummary, type EvaluatorConfig, } from './base'; export { EvalResults, createEvalResults, type TestResult, type AggregatedResults, type TrendPoint, } from './results'; export { Judge, AccuracyJudge, CriteriaJudge, RecipeJudge, addJudge, getJudge, listJudges, removeJudge, addOptimizationRule, getOptimizationRule, listOptimizationRules, removeOptimizationRule, parseJudgeResponse, type JudgeConfig, type JudgeCriteriaConfig, type JudgeResult, type JudgeRunOptions, type JudgeOptions, type JudgeProtocol, } from './judge';