import type { MastraDBMessage } from '@mastra/core/agent'; import type { ExpectedStep, ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput, TrajectoryExpectation, Trajectory } from '@mastra/core/evals'; import { RequestContext } from '@mastra/core/request-context'; export type ScorerRunInputForLLMJudge = ScorerRunInputForAgent | string | { inputMessages?: unknown[]; messages?: unknown[]; prompt?: string; text?: string; content?: unknown; input?: unknown; user?: unknown; [key: string]: unknown; }; export type ScorerRunOutputForLLMJudge = ScorerRunOutputForAgent | string | unknown[] | { text?: string; content?: unknown; role?: string; [key: string]: unknown; }; /** * Extracts text content from a MastraDBMessage or ModelMessage-like object. * * @param message - The message to extract text from * @returns The extracted text content, or an empty string if no text is found * * @example * ```ts * const message: MastraDBMessage = { * id: 'msg-1', * role: 'assistant', * content: { format: 2, parts: [{ type: 'text', text: 'Hello!' }] }, * createdAt: new Date(), * }; * const text = getTextContentFromMastraDBMessage(message); // 'Hello!' * ``` */ export declare function getTextContentFromMastraDBMessage(message: MastraDBMessage): string; export declare const isScorerRunInputForAgent: (input: unknown) => input is ScorerRunInputForAgent; export declare const isScorerRunOutputForAgent: (output: unknown) => output is ScorerRunOutputForAgent; /** * Rounds a number to two decimal places. * * Uses `Number.EPSILON` to handle floating-point precision issues. * * @param num - The number to round * @returns The number rounded to two decimal places * * @example * ```ts * roundToTwoDecimals(0.1 + 0.2); // 0.3 * roundToTwoDecimals(1.005); // 1.01 * ``` */ export declare const roundToTwoDecimals: (num: number) => number; /** * Determines if a value is closer to the first target than the second. * * @param value - The value to compare * @param target1 - The first target value * @param target2 - The second target value * @returns `true` if `value` is closer to `target1` than `target2` * * @example * ```ts * isCloserTo(0.6, 1, 0); // true (0.6 is closer to 1) * isCloserTo(0.3, 1, 0); // false (0.3 is closer to 0) * ``` */ export declare function isCloserTo(value: number, target1: number, target2: number): boolean; /** * Represents a test case for scorer evaluation. */ export type TestCase = { /** The input text to evaluate */ input: string; /** The output text to evaluate */ output: string; /** The expected result of the evaluation */ expectedResult: { /** The expected score */ score: number; /** The optional expected reason */ reason?: string; }; }; /** * Represents a test case with additional context for scorer evaluation. */ export type TestCaseWithContext = TestCase & { /** Additional context strings for the evaluation */ context: string[]; }; /** * Creates a scoring input object for testing purposes. * * @param input - The user input text * @param output - The assistant output text * @param additionalContext - Optional additional context data * @param requestContext - Optional request context data * @returns A ScoringInput object ready for use in scorer tests * * @example * ```ts * const run = createTestRun( * 'What is 2+2?', * 'The answer is 4.', * { topic: 'math' } * ); * ``` */ export declare const createTestRun: (input: string, output: string, additionalContext?: Record, requestContext?: Record) => ScoringInput; /** * Extracts the user message text from a scorer run input. * * Accepts the agent shape (`{ inputMessages }`), `ModelMessage[]` * (`{ messages }`), workflow input (`{ prompt }`), and a bare string. * * @param input - The scorer run input * @returns The user message text, or `undefined` if none can be extracted * * @example * ```ts * const scorer = createScorer({ ... }) * .preprocess(({ run }) => { * const userText = getUserMessageFromRunInput(run.input); * return { userText }; * }); * ``` */ export declare const getUserMessageFromRunInput: (input?: unknown) => string | undefined; /** * Extracts all system messages from a scorer run input. * * Collects text from both standard system messages and tagged system messages * (specialized system prompts like memory instructions). * * @param input - The scorer run input containing system messages * @returns An array of system message strings * * @example * ```ts * const scorer = createScorer({ ... }) * .preprocess(({ run }) => { * const systemMessages = getSystemMessagesFromRunInput(run.input); * return { systemPrompt: systemMessages.join('\n') }; * }); * ``` */ export declare const getSystemMessagesFromRunInput: (input?: unknown) => string[]; /** * Combines all system messages into a single prompt string. * * Joins all system messages (standard and tagged) with double newlines. * * @param input - The scorer run input containing system messages * @returns A combined system prompt string * * @example * ```ts * const scorer = createScorer({ ... }) * .preprocess(({ run }) => { * const systemPrompt = getCombinedSystemPrompt(run.input); * return { systemPrompt }; * }); * ``` */ export declare const getCombinedSystemPrompt: (input?: unknown) => string; /** * Extracts the assistant message text from a scorer run output. * * Accepts the agent shape (`MastraDBMessage[]` / `ModelMessage[]`), workflow * output (`{ text }`), task output (`{ content }`), a single assistant message * object, and a bare string. * * @param output - The scorer run output * @returns The assistant message text, or `undefined` if none can be extracted * * @example * ```ts * const scorer = createScorer({ ... }) * .preprocess(({ run }) => { * const response = getAssistantMessageFromRunOutput(run.output); * return { response }; * }); * ``` */ export declare const getAssistantMessageFromRunOutput: (output?: unknown) => string | undefined; /** * Extracts reasoning text from a scorer run output. * * This function extracts reasoning content from assistant messages, which is * produced by reasoning models like `deepseek-reasoner`. The reasoning can be * stored in two places: * 1. `content.reasoning` - a string field on the message content * 2. `content.parts` - as parts with `type: 'reasoning'` containing `details` * * @param output - The scorer run output (array of MastraDBMessage) * @returns The reasoning text, or `undefined` if no reasoning is present * * @example * ```ts * const reasoningScorer = createScorer({ * id: 'reasoning-scorer', * name: 'Reasoning Quality', * description: 'Evaluates the quality of model reasoning', * type: 'agent', * }) * .preprocess(({ run }) => { * const reasoning = getReasoningFromRunOutput(run.output); * const response = getAssistantMessageFromRunOutput(run.output); * return { reasoning, response }; * }) * .generateScore(({ results }) => { * // Score based on reasoning quality * return results.preprocessStepResult?.reasoning ? 1 : 0; * }); * ``` */ export declare const getReasoningFromRunOutput: (output?: ScorerRunOutputForAgent) => string | undefined; /** * Creates a tool invocation object for testing purposes. * * @param options - The tool invocation configuration * @param options.toolCallId - Unique identifier for the tool call * @param options.toolName - Name of the tool being called * @param options.args - Arguments passed to the tool * @param options.result - Result returned by the tool * @param options.state - State of the invocation (default: 'result') * @returns A tool invocation object * * @example * ```ts * const invocation = createToolInvocation({ * toolCallId: 'call-123', * toolName: 'weatherTool', * args: { location: 'London' }, * result: { temperature: 20, condition: 'sunny' }, * }); * ``` */ export declare const createToolInvocation: ({ toolCallId, toolName, args, result, state, }: { toolCallId: string; toolName: string; args: Record; result: Record; state?: "call" | "partial-call" | "result"; }) => { toolCallId: string; toolName: string; args: Record; result: Record; state: string; }; /** * Creates a MastraDBMessage object for testing purposes. * * Supports optional tool invocations for testing tool call scenarios. * * @param options - The message configuration * @param options.content - The text content of the message * @param options.role - The role of the message sender ('user', 'assistant', or 'system') * @param options.id - Optional message ID (default: 'test-message') * @param options.toolInvocations - Optional array of tool invocations * @returns A MastraDBMessage object * * @example * ```ts * const message = createTestMessage({ * content: 'Hello, how can I help?', * role: 'assistant', * }); * * // With tool invocations * const messageWithTools = createTestMessage({ * content: 'Let me check the weather.', * role: 'assistant', * toolInvocations: [{ * toolCallId: 'call-1', * toolName: 'weatherTool', * args: { location: 'Paris' }, * result: { temp: 22 }, * state: 'result', * }], * }); * ``` */ export declare function createTestMessage({ content, role, id, toolInvocations, }: { content: string; role: 'user' | 'assistant' | 'system'; id?: string; toolInvocations?: Array<{ toolCallId: string; toolName: string; args: Record; result: Record; state: any; }>; }): MastraDBMessage; /** * Creates a complete agent test run object for testing scorers. * * Provides a convenient way to construct the full run object that scorers receive, * including input messages, output, system messages, and request context. * * @param options - The test run configuration * @param options.inputMessages - Array of input messages (default: []) * @param options.output - The output messages (required) * @param options.rememberedMessages - Array of remembered messages from memory (default: []) * @param options.systemMessages - Array of system messages (default: []) * @param options.taggedSystemMessages - Tagged system messages map (default: {}) * @param options.requestContext - Request context (default: new RequestContext()) * @param options.runId - Unique run ID (default: random UUID) * @returns A complete test run object * * @example * ```ts * const testRun = createAgentTestRun({ * inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })], * output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })], * }); * * const result = await scorer.run({ * input: testRun.input, * output: testRun.output, * }); * ``` */ export declare const createAgentTestRun: ({ inputMessages, output, rememberedMessages, systemMessages, taggedSystemMessages, requestContext, runId, }: { inputMessages?: ScorerRunInputForAgent["inputMessages"]; output: ScorerRunOutputForAgent; rememberedMessages?: ScorerRunInputForAgent["rememberedMessages"]; systemMessages?: ScorerRunInputForAgent["systemMessages"]; taggedSystemMessages?: ScorerRunInputForAgent["taggedSystemMessages"]; requestContext?: RequestContext; runId?: string; }) => { input: ScorerRunInputForAgent; output: ScorerRunOutputForAgent; requestContext: RequestContext; runId: string; }; /** * Creates a test run for trajectory scorers where `output` is a `Trajectory` * (pre-extracted by the `runEvals` pipeline). * * @example * ```ts * const testRun = createTrajectoryTestRun({ * inputMessages: [createTestMessage({ content: 'Do X', role: 'user', id: 'u1' })], * trajectory: { * steps: [ * { stepType: 'tool_call', name: 'search', toolArgs: { q: 'test' } }, * ], * }, * }); * ``` */ export declare const createTrajectoryTestRun: ({ inputMessages, trajectory, rememberedMessages, systemMessages, taggedSystemMessages, requestContext, runId, expectedTrajectory, }: { inputMessages?: ScorerRunInputForAgent["inputMessages"]; trajectory: Trajectory; rememberedMessages?: ScorerRunInputForAgent["rememberedMessages"]; systemMessages?: ScorerRunInputForAgent["systemMessages"]; taggedSystemMessages?: ScorerRunInputForAgent["taggedSystemMessages"]; requestContext?: RequestContext; runId?: string; expectedTrajectory?: TrajectoryExpectation; }) => { input: ScorerRunInputForAgent; output: Trajectory; requestContext: RequestContext; runId: string; expectedTrajectory?: TrajectoryExpectation; }; /** * Information about a tool call extracted from scorer output. */ export type ToolCallInfo = { /** Name of the tool that was called */ toolName: string; /** Unique identifier for the tool call */ toolCallId: string; /** Index of the message containing this tool call */ messageIndex: number; /** Index of the invocation within the message's tool invocations */ invocationIndex: number; }; /** * Extracts all tool calls from a scorer run output. * * Iterates through all messages and their tool invocations to collect * information about tools that were called (with state 'result' or 'call'). * * @param output - The scorer run output (array of MastraDBMessage) * @returns An object containing tool names and detailed tool call info * * @example * ```ts * const scorer = createScorer({ ... }) * .preprocess(({ run }) => { * const { tools, toolCallInfos } = extractToolCalls(run.output); * return { * toolsUsed: tools, * toolCount: tools.length, * }; * }); * ``` */ export declare function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[]; }; /** * Extracts text content from all input messages. * * @param runInput - The scorer run input * @returns An array of text strings from each input message * * @example * ```ts * const scorer = createScorer({ ... }) * .preprocess(({ run }) => { * const messages = extractInputMessages(run.input); * return { allUserMessages: messages.join('\n') }; * }); * ``` */ export declare const extractInputMessages: (runInput: ScorerRunInputForAgent | undefined) => string[]; /** * Extracts text content from all assistant response messages. * * Filters for messages with role 'assistant' and extracts their text content. * * @param runOutput - The scorer run output (array of MastraDBMessage) * @returns An array of text strings from each assistant message * * @example * ```ts * const scorer = createScorer({ ... }) * .preprocess(({ run }) => { * const responses = extractAgentResponseMessages(run.output); * return { allResponses: responses.join('\n') }; * }); * ``` */ export declare const extractAgentResponseMessages: (runOutput: ScorerRunOutputForAgent) => string[]; /** * Information about a tool result extracted from scorer output. */ export type ToolResultInfo = { /** Name of the tool that was called */ toolName: string; /** Unique identifier for the tool call */ toolCallId: string; /** Arguments passed to the tool */ args: Record; /** Result returned by the tool */ result: any; }; /** * Extracts tool results from a scorer run output. * * Returns structured objects that can be used with the hallucination scorer's * `getContext` hook or for other scorer logic. * * @param output - The scorer run output (array of MastraDBMessage) * @returns An array of ToolResultInfo objects * * @example * ```ts * import { extractToolResults } from '@mastra/evals/scorers'; * import { createHallucinationScorer } from '@mastra/evals/scorers/prebuilt'; * * const scorer = createHallucinationScorer({ * model: openai('gpt-4o'), * options: { * getContext: (run) => { * const toolResults = extractToolResults(run.output); * return toolResults.map(t => JSON.stringify({ tool: t.toolName, result: t.result })); * }, * }, * }); * ``` */ export declare function extractToolResults(output: ScorerRunOutputForAgent): ToolResultInfo[]; export { extractTrajectory } from '@mastra/core/evals'; /** * Compares two trajectories and returns detailed comparison results. * * This is the core comparison logic used by trajectory scorers. It supports * strict and non-strict ordering, optional step data comparison, and loop detection. * * @param actual - The trajectory the agent actually took * @param expected - The expected trajectory to compare against * @param options - Comparison configuration options * @returns Detailed comparison results including match scores and diagnostics * * @example * ```ts * const result = compareTrajectories( * { steps: [{ stepType: 'tool_call', name: 'search' }, { stepType: 'tool_call', name: 'summarize' }] }, * { steps: [{ stepType: 'tool_call', name: 'search' }, { stepType: 'tool_call', name: 'summarize' }] }, * { ordering: 'strict' } * ); * // result.score = 1.0 * ``` */ export declare function compareTrajectories(actual: Trajectory, expected: Trajectory | { steps: ExpectedStep[]; }, options?: { ordering?: 'strict' | 'relaxed' | 'unordered'; allowRepeatedSteps?: boolean; }): TrajectoryComparisonResult; /** * Result of comparing two trajectories. */ export type TrajectoryComparisonResult = { /** Overall match score from 0 to 1 */ score: number; /** Number of expected steps that were matched */ matchedSteps: number; /** Total number of expected steps */ totalExpectedSteps: number; /** Total number of actual steps taken */ totalActualSteps: number; /** Expected steps that were not found in the actual trajectory */ missingSteps: string[]; /** Actual steps that were not in the expected trajectory */ extraSteps: string[]; /** Steps that appear but not in the expected position */ outOfOrderSteps: string[]; /** Steps that were repeated (appeared more than once) */ repeatedSteps: string[]; }; /** * Result of checking trajectory efficiency. */ export type TrajectoryEfficiencyResult = { /** Overall efficiency score from 0 to 1 */ score: number; /** Total number of steps taken */ totalSteps: number; /** Whether the step budget was exceeded */ overStepBudget: boolean; /** Total tokens used across model_generation steps */ totalTokens: number; /** Whether the token budget was exceeded */ overTokenBudget: boolean; /** Total duration in milliseconds */ totalDurationMs: number; /** Whether the duration budget was exceeded */ overDurationBudget: boolean; /** Redundant calls detected (same tool + same args consecutively) */ redundantCalls: Array<{ name: string; index: number; }>; }; /** * Evaluate trajectory efficiency against budgets and redundancy checks. */ export declare function checkTrajectoryEfficiency(trajectory: Trajectory, options?: { maxSteps?: number; maxTotalTokens?: number; maxTotalDurationMs?: number; noRedundantCalls?: boolean; }): TrajectoryEfficiencyResult; /** * Result of checking trajectory against a blacklist. */ export type TrajectoryBlacklistResult = { /** Score: 1.0 if clean, 0.0 if any violation found */ score: number; /** Individual blacklisted tools that were found */ violatedTools: string[]; /** Blacklisted sequences that were found */ violatedSequences: string[][]; }; /** * Check if a trajectory violates any blacklist rules. * Returns score 0.0 if any violation is found (hard fail). */ export declare function checkTrajectoryBlacklist(trajectory: Trajectory, options?: { blacklistedTools?: string[]; blacklistedSequences?: string[][]; }): TrajectoryBlacklistResult; /** * A detected tool failure pattern in the trajectory. */ export type ToolFailurePattern = { /** The tool name that experienced failure */ toolName: string; /** Number of consecutive retries (same tool, same or similar args) */ retryCount: number; /** Whether the agent fell back to a different tool after failures */ fellBackToAlternative: boolean; /** The alternative tool used, if any */ alternativeTool?: string; /** Whether any retry eventually succeeded */ eventuallySucceeded: boolean; }; /** * Result of analyzing tool failure patterns in a trajectory. */ export type ToolFailureAnalysisResult = { /** Score from 0 to 1 (lower = more failures/retries) */ score: number; /** Tool failure patterns detected */ patterns: ToolFailurePattern[]; /** Total number of retries across all tools */ totalRetries: number; /** Tools that exceeded the retry threshold */ excessiveRetryTools: string[]; }; /** * Analyze tool failure and retry patterns in a trajectory. */ export declare function analyzeToolFailures(trajectory: Trajectory, options?: { maxRetriesPerTool?: number; }): ToolFailureAnalysisResult; //# sourceMappingURL=utils.d.ts.map