import type { ExpectedStep, Trajectory, TrajectoryComparisonOptions, TrajectoryExpectation } from '@mastra/core/evals'; import type { TrajectoryComparisonResult, TrajectoryEfficiencyResult, TrajectoryBlacklistResult, ToolFailureAnalysisResult } from '../../utils.js'; interface TrajectoryAccuracyScorerCodeOptions { /** * The expected trajectory to compare against. * Accepts a Trajectory (full trajectory steps) or ExpectedStep[] (lightweight matchers). * If not provided, the scorer will use `run.expectedTrajectory` from the dataset item. */ expectedTrajectory?: Trajectory | ExpectedStep[]; /** Comparison behavior options */ comparisonOptions?: TrajectoryComparisonOptions; } /** * Creates a code-based trajectory accuracy scorer that compares the actual sequence * of tool calls an agent made against an expected trajectory. * * This scorer extracts the agent's tool call trajectory from its output messages * and compares it against a predefined expected trajectory. It supports strict, * relaxed, and unordered comparison modes. * * @param options - Configuration for the trajectory scorer * @returns A scorer that evaluates trajectory accuracy * * @example * ```ts * import { createTrajectoryAccuracyScorerCode } from '@mastra/evals/scorers'; * * const scorer = createTrajectoryAccuracyScorerCode({ * expectedTrajectory: { * steps: [ * { stepType: 'tool_call', name: 'search' }, * { stepType: 'tool_call', name: 'summarize' }, * ], * }, * comparisonOptions: { * ordering: 'relaxed', * allowRepeatedSteps: true, * }, * }); * * const result = await scorer.run(agentRun); * // result.score: 0.0 - 1.0 * // result.preprocessStepResult.comparison: detailed comparison results * ``` */ export declare function createTrajectoryAccuracyScorerCode(options?: TrajectoryAccuracyScorerCodeOptions): import("@mastra/core/evals").MastraScorer<"code-trajectory-accuracy-scorer", import("@mastra/core/evals").ScorerRunInputForAgent, Trajectory, Record<"preprocessStepResult", { actualTrajectory: Trajectory; expectedTrajectory: undefined; comparison: undefined; actualStepNames: string[]; expectedStepNames: never[]; error: string; } | { actualTrajectory: Trajectory; expectedTrajectory: { steps: ExpectedStep[]; }; comparison: TrajectoryComparisonResult; actualStepNames: string[]; expectedStepNames: string[]; error?: undefined; }> & Record<"generateScoreStepResult", number>>; /** * Result from evaluating a nested step's children against its TrajectoryExpectation. */ export type NestedEvaluationResult = { /** Name of the expected step that contained the nested config */ stepName: string; /** Score for this nested evaluation (0.0 - 1.0) */ score: number; /** Accuracy result for the children */ accuracy?: TrajectoryComparisonResult; /** Efficiency result for the children */ efficiency?: TrajectoryEfficiencyResult; /** Blacklist result for the children */ blacklist?: TrajectoryBlacklistResult; /** Tool failure result for the children */ toolFailures?: ToolFailureAnalysisResult; /** Further nested results from deeper levels */ nested?: NestedEvaluationResult[]; }; /** * Multi-dimensional result from the unified trajectory scorer. */ export type TrajectoryScoreResult = { /** Overall score (0.0 - 1.0). Weighted combination of dimensions (0.0 if blacklist violation). */ score: number; /** Accuracy sub-score (step matching). Only present if expected steps were provided. */ accuracy?: TrajectoryComparisonResult; /** Efficiency sub-score (budgets + redundancy). */ efficiency?: TrajectoryEfficiencyResult; /** Blacklist sub-score (forbidden tools/sequences). */ blacklist?: TrajectoryBlacklistResult; /** Tool failure analysis. */ toolFailures?: ToolFailureAnalysisResult; /** Results from evaluating nested step expectations. */ nested?: NestedEvaluationResult[]; }; export interface TrajectoryScoreWeights { /** Weight for accuracy dimension (default: 0.4) */ accuracy?: number; /** Weight for efficiency dimension (default: 0.3) */ efficiency?: number; /** Weight for tool failures dimension (default: 0.2) */ toolFailures?: number; /** Weight for blacklist dimension (default: 0.1) */ blacklist?: number; } export interface TrajectoryScorerCodeOptions { /** * Default expectation config for all runs. * Per-item `run.expectedTrajectory` values override these defaults. */ defaults?: TrajectoryExpectation; /** * Weights for combining dimension scores into the final score. * Only active dimensions are used — weights are normalized to sum to 1.0. * Blacklist violations always override to 0 regardless of weight. */ weights?: TrajectoryScoreWeights; } /** * Creates a unified trajectory scorer that evaluates multiple dimensions: * accuracy (step matching), efficiency (budgets, redundancy), blacklist (forbidden tools/sequences), * and tool failure patterns. * * Configuration can be set at two levels: * - **Constructor defaults** (`defaults`) — agent-level defaults for all dataset items * - **Per-item overrides** (`run.expectedTrajectory`) — prompt-specific overrides from dataset items * * Per-item values override constructor defaults for all fields. * * @param options - Default trajectory expectations * @returns A scorer with multi-dimensional trajectory evaluation * * @example * ```ts * import { createTrajectoryScorerCode } from '@mastra/evals/scorers'; * * const scorer = createTrajectoryScorerCode({ * defaults: { * steps: [ * { stepType: 'tool_call', name: 'search' }, * { stepType: 'tool_call', name: 'summarize' }, * ], * ordering: 'relaxed', * maxSteps: 5, * noRedundantCalls: true, * blacklistedTools: ['deleteAll'], * }, * weights: { accuracy: 0.5, efficiency: 0.3, toolFailures: 0.1, blacklist: 0.1 }, * }); * ``` */ export declare function createTrajectoryScorerCode(options?: TrajectoryScorerCodeOptions): import("@mastra/core/evals").MastraScorer<"code-trajectory-scorer", import("@mastra/core/evals").ScorerRunInputForAgent, Trajectory, Record<"preprocessStepResult", { accuracy: TrajectoryComparisonResult | undefined; efficiency: TrajectoryEfficiencyResult | undefined; blacklist: TrajectoryBlacklistResult | undefined; toolFailures: ToolFailureAnalysisResult; nested: NestedEvaluationResult[] | undefined; config: TrajectoryExpectation; }> & Record<"generateScoreStepResult", number> & Record<"generateReasonStepResult", string>>; export {}; //# sourceMappingURL=index.d.ts.map