import { JsonValue, HarnessRun, GenAiOperationName, ToolCallRecord, NormalizedSpanEvent, NormalizedSpan, NormalizedTrace, NormalizedMessage, UsageSummary, TimingSummary, NormalizedSpanAttributes, HarnessRunError, NormalizedSession } from '@vitest-evals/core'; export { GenAiOperationName, GenAiOutputType, GenAiProviderName, GenAiSemanticAttributeKey, GenAiSemanticAttributes, GenAiTokenType, GenAiToolType, HarnessRun, HarnessRunError, JsonPrimitive, JsonValue, NormalizedMessage, NormalizedSession, NormalizedSpan, NormalizedSpanAttributeKey, NormalizedSpanAttributes, NormalizedSpanEvent, NormalizedTrace, OpenTelemetrySemanticAttributeKey, OpenTelemetrySemanticAttributes, TimingSummary, ToolCallRecord, UsageSummary, assistantMessages, failedSpans, latestAssistantMessageContent, messagesByRole, spans, spansByKind, systemMessages, toolCalls, toolMessages, userMessages } from '@vitest-evals/core'; /** Options for converting normalized tool calls into trace spans. */ type CreateToolCallSpansOptions = { /** Trace id to attach to each generated tool span. */ traceId?: string; /** Parent span id to attach to each generated tool span. */ parentId?: string; /** Prefix used to create internal span ids instead of reusing tool-call ids. */ spanIdPrefix?: string; }; /** Options for attaching a fallback run trace to a harness result. */ type EnsureRunTraceOptions = { /** Human-readable run or harness name. */ name: string; /** Wall-clock start time for the harness run. */ startedAt: Date; /** Wall-clock finish time for the harness run. */ finishedAt: Date; /** Optional trace id. A generated id is used when omitted. */ id?: string; /** GenAI operation name to place on the root run span. */ operationName?: GenAiOperationName; /** Optional JSON-safe source marker for the trace metadata. */ source?: string; }; type OutputField = undefined extends TOutput ? { output?: TOutput; } : { output: TOutput; }; /** Per-run metadata shape accepted by harnesses and eval tests. */ type HarnessMetadata = Record; /** * Runtime context passed from the eval fixture into a harness run. * * @example * ```ts * const harness: Harness = { * name: "refund-agent", * async run(input, context) { * context.setArtifact("inputLength", input.length); * * return { * output: undefined, * session: { messages: [{ role: "user", content: input }] }, * usage: {}, * errors: [], * }; * }, * }; * ``` */ type HarnessContext = { /** Per-run metadata passed through `run(input, { metadata })`. */ metadata: Readonly; /** Abort signal from Vitest when available. */ signal?: AbortSignal; /** Mutable JSON-safe artifact bag shared with the harness. */ artifacts: Record; /** Stores one JSON-safe artifact on the current run. */ setArtifact: (name: string, value: JsonValue) => void; }; /** * Adapter that executes the system under test and returns a normalized run. * * @example * ```ts * const harness: Harness = { * name: "refund-agent", * async run(input, context) { * return normalizeHarnessRun(input, await runRefundFlow(input), context); * }, * }; * ``` */ type Harness = { /** Stable harness name used in reports. */ name: string; /** Executes the system under test and returns a normalized run. */ run: (input: TInput, context: HarnessContext) => Promise>; }; /** Value or promise accepted by lightweight harness callbacks. */ type MaybePromise = T | Promise; /** Lightweight tool-call record accepted by `createHarness(...)` results. */ type SimpleToolCallRecord = Omit & { /** Raw tool arguments accepted by `createHarness(...)` before normalization. */ arguments?: unknown; /** Raw tool result accepted by `createHarness(...)` before normalization. */ result?: unknown; /** Raw tool error accepted by `createHarness(...)` before normalization. */ error?: unknown; /** Raw tool metadata accepted by `createHarness(...)` before normalization. */ metadata?: Record; }; /** Lightweight span event accepted by `createHarness(...)` results. */ type SimpleSpanEvent = Omit & { /** Raw event attributes accepted by `createHarness(...)` before normalization. */ attributes?: Record; }; /** Lightweight span record accepted by `createHarness(...)` results. */ type SimpleSpanRecord = Omit & { /** Raw span attributes accepted by `createHarness(...)` before normalization. */ attributes?: Record; /** Raw span error accepted by `createHarness(...)` before normalization. */ error?: unknown; /** Raw span events accepted by `createHarness(...)` before normalization. */ events?: SimpleSpanEvent[]; }; /** Lightweight trace record accepted by `createHarness(...)` results. */ type SimpleTraceRecord = Omit & { /** Raw trace metadata accepted by `createHarness(...)` before normalization. */ metadata?: Record; /** Lightweight spans to normalize into the trace. */ spans: SimpleSpanRecord[]; }; /** * Lightweight result shape normalized by `createHarness(...)`. * * @example * ```ts * const result: SimpleHarnessResult<{ status: "approved" }> = { * output: { status: "approved" }, * toolCalls: [{ name: "lookupInvoice", arguments: { invoiceId: "inv_123" } }], * usage: { totalTokens: 260 }, * }; * ``` */ type SimpleHarnessResult = OutputField & { /** Pre-normalized transcript messages. When omitted, a default user/assistant transcript is created. */ messages?: NormalizedMessage[]; /** Lightweight tool-call records to normalize into the session. */ toolCalls?: SimpleToolCallRecord[]; /** Usage summary to attach to the run. */ usage?: UsageSummary; /** Timing summary to attach to the run. */ timings?: TimingSummary; /** Raw artifact values to normalize and merge into the run. */ artifacts?: Record; /** Lightweight traces and spans to normalize into the run. */ traces?: SimpleTraceRecord[]; /** Raw session metadata to normalize into the session. */ metadata?: Record; /** Raw errors to normalize into the run. */ errors?: unknown[]; }; /** Either a complete normalized run or a lightweight result to normalize. */ type HarnessResultLike = HarnessRun | SimpleHarnessResult; /** Arguments passed to the `createHarness(...)` convenience callback. */ type CreateHarnessRunArgs = { /** Original input passed to `run(input)`. */ input: TInput; /** Read-only metadata passed to `run(input, { metadata })`. */ metadata: Readonly; /** Abort signal from Vitest when available. */ signal?: AbortSignal; /** Mutable run artifact bag. */ artifacts: HarnessContext["artifacts"]; /** Stores one JSON-safe artifact on the current run. */ setArtifact: HarnessContext["setArtifact"]; }; /** * Options for creating a lightweight custom application harness. * * @example * ```ts * const options: CreateHarnessOptions = { * name: "refund-agent", * run: async ({ input }) => ({ * output: await classifyRefund(input), * }), * }; * ``` */ type CreateHarnessOptions = { /** Stable harness name used in reports. */ name: string; /** Executes application code and returns either a lightweight result or full `HarnessRun`. */ run: (args: CreateHarnessRunArgs) => MaybePromise>; }; /** Returns true when a value exposes a callable method with the given name. */ declare function hasCallableMethod(value: unknown, methodName: string): boolean; /** Normalizes an unknown value into the JSON-safe shape used by harness runs. */ declare function toJsonValue(value: unknown): JsonValue | undefined; /** Drops non-JSON properties from a record while preserving valid values. */ declare function normalizeRecord(value: Record): Record; /** Normalizes metadata and omits the field entirely when nothing survives. */ declare function normalizeMetadata(value: Record): Record | undefined; /** Converts arbitrary content into the JSON-safe message content shape. */ declare function normalizeContent(value: unknown): JsonValue; /** * Creates a harness from the common "run app code and return output" shape. * * @param options - Harness name plus the callback that executes app code. * * @example * ```ts * import { createHarness } from "vitest-evals"; * * export const refundHarness = createHarness< * string, * { status: "approved" | "denied" }, * { expected: { status: "approved" | "denied" } } * >({ * name: "refund-agent", * run: async ({ input, metadata, setArtifact }) => { * const result = await runRefundFlow(input, metadata); * const output = { status: result.status }; * * setArtifact("case", { expected: metadata.expected.status }); * * return { * output, * toolCalls: result.toolCalls, * usage: { provider: "openai", model: "gpt-4o-mini" }, * }; * }, * }); * ``` */ declare function createHarness(options: CreateHarnessOptions): Harness; /** * Normalizes a lightweight harness result into the reporter-facing run shape. * * @param input - Original input passed to the harness. * @param result - Lightweight result or pre-normalized harness run. * @param context - Optional per-run context used to merge artifacts. * * @example * ```ts * const run = normalizeHarnessRun("Refund invoice inv_123", { * output: { status: "approved" }, * toolCalls: [{ name: "lookupInvoice", arguments: { invoiceId: "inv_123" } }], * usage: { provider: "openai", model: "gpt-4o-mini" }, * }); * * expect(toolCalls(run.session)).toHaveLength(1); * ``` */ declare function normalizeHarnessRun(input: TInput, result: HarnessResultLike, context?: HarnessContext): HarnessRun; /** * Builds a JSON-safe failed run for errors that happen before a harness can return. * * @param input - Original input passed to the harness. * @param error - Error thrown by setup or execution. * @param options - Optional artifacts to preserve on the failed run. */ declare function createFailedHarnessRun(input: unknown, error: unknown, options?: { artifacts?: Record; }): HarnessRun; /** Normalizes arbitrary span errors while preserving object-shaped messages. */ declare function normalizeSpanError(error: unknown): NormalizedSpan["error"] | undefined; /** Normalizes raw span attributes into the JSON-safe span attribute shape. */ declare function normalizeSpanAttributes(attributes: Record): NormalizedSpanAttributes | undefined; /** Builds common OpenTelemetry GenAI usage attributes from a usage summary. */ declare function createGenAiUsageAttributes(usage: UsageSummary | undefined, options?: { provider?: string; }): { "gen_ai.provider.name": string | undefined; "gen_ai.request.model": string | undefined; "gen_ai.response.model": string | undefined; "gen_ai.usage.input_tokens": number | undefined; "gen_ai.usage.output_tokens": number | undefined; "gen_ai.usage.reasoning.output_tokens": number | undefined; }; /** * Converts normalized tool-call records into trace spans. * * Tool-call ids are preserved as GenAI attributes. Pass `spanIdPrefix` when the * spans belong to a known trace so span ids stay internally unique. */ declare function createToolCallSpans(calls: ToolCallRecord[], options?: CreateToolCallSpansOptions): NormalizedSpan[]; /** * Attaches a fallback run trace when a harness result does not already contain spans. * * This keeps custom harnesses inspectable while first-party harness packages * remain free to attach richer native traces. */ declare function ensureRunTrace(run: HarnessRun, options: EnsureRunTraceOptions): NormalizedTrace | undefined; /** * Attaches a partial or complete harness run to an arbitrary thrown error. * * @param error - Thrown value to wrap. * @param run - Partial or complete normalized harness run to preserve. * * @example * ```ts * try { * return await runAgent(input); * } catch (error) { * throw attachHarnessRunToError(error, partialRun); * } * ``` */ declare function attachHarnessRunToError(error: unknown, run: HarnessRun): HarnessRunError; /** * Reads an attached harness run back off a previously wrapped error value. * * @param error - Unknown thrown value that may contain a harness run. * * @example * ```ts * const partialRun = getHarnessRunFromError(error); * * if (partialRun) { * console.log(toolCalls(partialRun.session)); * } * ``` */ declare function getHarnessRunFromError(error: unknown): HarnessRun | undefined; /** Returns true when a value matches the normalized `HarnessRun` contract. */ declare function isHarnessRun(value: unknown): value is HarnessRun; /** Returns true when a value matches the normalized session contract. */ declare function isNormalizedSession(value: unknown): value is NormalizedSession; /** Reuses pre-normalized harness errors when a runtime already returns them. */ declare function resolveHarnessRunErrors(result: unknown): Array>; /** Serializes an arbitrary thrown value into the normalized error shape. */ declare function serializeError(error: unknown): Record; export { type CreateHarnessOptions, type CreateHarnessRunArgs, type CreateToolCallSpansOptions, type EnsureRunTraceOptions, type Harness, type HarnessContext, type HarnessMetadata, type HarnessResultLike, type MaybePromise, type SimpleHarnessResult, type SimpleSpanEvent, type SimpleSpanRecord, type SimpleToolCallRecord, type SimpleTraceRecord, attachHarnessRunToError, createFailedHarnessRun, createGenAiUsageAttributes, createHarness, createToolCallSpans, ensureRunTrace, getHarnessRunFromError, hasCallableMethod, isHarnessRun, isNormalizedSession, normalizeContent, normalizeHarnessRun, normalizeMetadata, normalizeRecord, normalizeSpanAttributes, normalizeSpanError, resolveHarnessRunErrors, serializeError, toJsonValue };