/**
 * runner.ts — AgentEvalRunner: drives the agent under test programmatically.
 *
 * Key design decisions:
 *
 * 1. ISOLATED HISTORY: Each test case gets a fresh QueryEngine instance, so
 *    conversation history never bleeds between test cases.
 *
 * 2. SAFE TOOL EXECUTION DURING EVAL:
 *    - READ tools: auto-approved (tracker_list_jobs, get_resume, search_jobs, etc.)
 *    - WRITE tools (tracker_add_job, tracker_update_job): auto-denied by default.
 *      Tests marked write-op use a TEMP COPY of jobs.csv so they can test
 *      write operations safely without modifying the real CSV.
 *
 * 3. LATENCY MEASUREMENT: wall-clock time summed across all turns for a test.
 *
 * 4. TOOL TRACKING: `toolsCalled` list is populated via onToolCall hook,
 *    then used by the scorer for deterministic TIA scoring.
 *
 * 5. TIMEOUT: each test case has a configurable timeout (default 120s).
 *    On timeout, the test is scored as a FAIL with latencyScore=0.
 */
import type { IDataLogger } from "./storage/IDataLogger.js";
import type { TestCase, EvalResult, RunSummary, RunnerOptions } from "./types.js";
export declare class AgentEvalRunner {
    private readonly runId;
    private readonly opts;
    private readonly logger;
    constructor(logger: IDataLogger, opts: RunnerOptions);
    /** Run a specific list of TestCases and return the run summary. */
    runSuite(tests: TestCase[], suiteName: string): Promise<RunSummary>;
    /** Run a single TestCase. Handles write-op isolation automatically. */
    runTest(tc: TestCase, suite: string, current: number, total: number): Promise<EvalResult>;
    private executeTest;
    private makeErrorResult;
    private buildSummary;
    private printSummary;
}
//# sourceMappingURL=runner.d.ts.map