/**
 * scorer.ts — Two-layer scoring engine for the CareerVivid Agent Eval Framework.
 *
 * Layer 1 — Deterministic (instant, free):
 *   - Tool Invocation Accuracy (TIA): expected vs actual tool calls
 *   - Latency Score (LAT): inverted bucket based on ms
 *   - Keyword bonus/malus applied on top of LLM IRA score
 *
 * Layer 2 — LLM-as-Judge (cheap, ~0.01 credit per test):
 *   - Calls Gemini with a structured rubric prompt
 *   - Scores IRA, RQ, CR, RC, HR on 0–10 scales
 *   - Returns a one-sentence rationale per dimension
 *
 * When --no-judge is set, qualitative dims default to 5.0 (neutral baseline)
 * and only deterministic dims are computed.
 */
import type { TestCase, DimensionScores, EvalResult } from "./types.js";
/**
 * Convert response latency to a 0–10 score.
 * Very fast responses (< 2s) get a perfect 10; anything ≥ 30s gets 0.
 */
export declare function latencyToScore(ms: number): number;
/**
 * Score tool invocation accuracy by comparing expected vs actual tool calls.
 *
 * Scoring rationale:
 *  - All expected tools called → 10
 *  - Partial → proportional credit (floor 2 per expected tool)
 *  - Zero expected tools → 10 (not applicable)
 *  - Forbidden tool called → penalty applied (−2 per forbidden tool)
 */
export declare function scoreTia(expectedTools: string[], forbiddenTools: string[], actualTools: string[]): number;
/**
 * Returns true when a test required specific tools but NONE of them were called.
 * Used to apply a hard composite cap in the scorer — bypassing the 5.0 neutral
 * heuristics baseline that would otherwise mask the tool-invocation failure.
 */
export declare function didMissAllRequiredTools(expectedTools: string[], actualTools: string[]): boolean;
/**
 * Apply a small keyword-based bonus/penalty to an existing IRA score.
 * Missing expected keywords: −0.5 per keyword (max −2)
 * All keywords present: +0.5 bonus (capped at 10)
 */
export declare function applyKeywordBonus(iraBase: number, expectedKeywords: string[], responseText: string): number;
export declare function computeComposite(scores: DimensionScores): number;
export interface ScorerOptions {
    geminiApiKey: string;
    judgeModel: string;
    noJudge: boolean;
}
/**
 * Score a test case result.
 *
 * Accepts the raw agent response, latency, and tool calls, then returns
 * the fully populated DimensionScores + composite + pass/fail + rationale.
 */
export declare function score(opts: {
    tc: TestCase;
    agentResponse: string;
    latencyMs: number;
    toolsCalled: string[];
    scorerOpts: ScorerOptions;
}): Promise<Pick<EvalResult, "scores" | "composite" | "pass" | "judgeRationale">>;
//# sourceMappingURL=scorer.d.ts.map