/**
 * Dual Tournament Engine
 *
 * Shared scoring/ranking utilities used by dual tournaments in both /upgrade and /attack flows.
 * Encodes policy vs evaluator tournaments with human-like reward heuristics and multi-evaluator
 * aggregation to approximate human code review preferences.
 */
export type PolicyId = string;
export interface TournamentPolicy {
    id: PolicyId;
    label?: string;
    kind?: 'primary' | 'refiner' | 'checkpoint' | 'attack';
    elo?: number;
}
export interface TournamentEvaluator {
    id: string;
    label?: string;
    /** Weight used when combining evaluator rankings (can be influenced by historical ELO). */
    weight?: number;
    kind?: 'hard' | 'soft' | 'hybrid';
    elo?: number;
}
export interface TournamentTask {
    id: string;
    goal: string;
    repoSnapshot?: string;
    tests?: string[];
    constraints?: string[];
    metadata?: Record<string, unknown>;
}
export interface CandidateMetrics {
    executionSuccess?: number;
    testsPassed?: number;
    testsFailed?: number;
    staticAnalysis?: number;
    codeQuality?: number;
    blastRadius?: number;
    diffSize?: number;
    complexityDelta?: number;
    dependenciesAdded?: number;
    speedBonus?: number;
    toolSuccesses?: number;
    toolFailures?: number;
    warnings?: number;
}
export interface CandidateSignals {
    /** Learned reward model / preference score (0-1) */
    rewardModelScore?: number;
    /** Self-assessed confidence from the agent (0-1) */
    selfAssessment?: number;
    /** Optional human preference label (0-1) */
    humanPreference?: number;
}
export interface EvaluatorScore {
    evaluatorId: string;
    score: number;
    weight?: number;
    notes?: string;
}
export interface TournamentCandidate {
    id: string;
    policyId: PolicyId;
    patchSummary?: string;
    diffSummary?: string;
    metrics?: CandidateMetrics;
    signals?: CandidateSignals;
    evaluatorScores?: EvaluatorScore[];
    rawOutput?: string;
}
export interface HumanRewardWeights {
    /** Correctness weight */
    alpha: number;
    /** Code quality / robustness weight */
    beta: number;
    /** Learned reward / human preference weight */
    gamma: number;
}
export declare const DEFAULT_HUMAN_REWARD_WEIGHTS: HumanRewardWeights;
export interface RankedCandidate {
    candidateId: string;
    aggregateScore: number;
    /** Relative human-like accuracy (1 = best rank, 0 = worst rank) */
    humanAccuracy: number;
    rank: number;
    correctnessScore: number;
    qualityScore: number;
    learnedScore: number;
    evaluatorScore: number;
}
export type PairwiseWins = Record<string, Record<string, number>>;
export interface TournamentOutcome {
    task: TournamentTask;
    ranked: RankedCandidate[];
    pairwise: PairwiseWins;
    evaluatorBreakdown: Record<string, EvaluatorScore[]>;
}
export interface TournamentOptions {
    rewardWeights?: HumanRewardWeights;
    evaluators?: TournamentEvaluator[];
    /** When true, prefer smaller diffs by default if diffSize is provided */
    preferSmallerDiff?: boolean;
    /** Maximum candidates to evaluate (caps O(n^2) work) */
    maxCandidates?: number;
}
/**
  * Run a dual tournament over candidate patches/agents, combining hard metrics,
  * human-like reward heuristics, and evaluator rankings.
  */
export declare function runDualTournament(task: TournamentTask, candidates: TournamentCandidate[], options?: TournamentOptions): TournamentOutcome;
//# sourceMappingURL=dualTournament.d.ts.map