import { executeToolPlan } from '../agent-context/runner.js'; import { type AgentContextToolRunner } from '../agent-context/tool-runner.js'; import { type WorkflowReplayResult } from './runner.js'; import { loadSubagentLiveCaseResult, type SubagentLiveResult, type TelemetryStep } from './subagent-live.js'; import type { AgentSafeBenchmarkCase, AgentSafeBenchmarkSuite, AgentSafeCaseKey, SemanticTuple } from './types.js'; type CaseKey = AgentSafeCaseKey; export interface SameScriptCaseResult { tool_plan: AgentSafeBenchmarkCase['tool_plan']; steps: TelemetryStep[]; semantic_tuple: SemanticTuple; semantic_tuple_pass: boolean; tool_calls_to_completion: number; tokens_to_completion: number; } export interface AgentSafeQueryContextBenchmarkReport { generatedAt: string; workflow_replay_full: Record; workflow_replay_slim: Record; same_script_full: Record; same_script_slim: Record; subagent_live: Record; acceptance: { pass: boolean; cases: Record; }; pass: boolean; cases: Record; same_script: { tool_plan: Record; cases: Record; }; semantic_equivalence: { pass: boolean; cases: Record; }; token_summary: Record; call_summary: Record; } export declare function runAgentSafeQueryContextBenchmark(suite: AgentSafeBenchmarkSuite, options: { repo?: string; subagentRunsDir?: string; }, deps?: { runner?: AgentContextToolRunner; executeToolPlan?: typeof executeToolPlan; loadSubagentLiveCaseResult?: typeof loadSubagentLiveCaseResult; }): Promise; export declare function writeAgentSafeQueryContextReports(reportDir: string, report: AgentSafeQueryContextBenchmarkReport): Promise; export {};