/** * SolverBandit - Thompson Sampling bandit for AgentDB decisions (ADR-010) * * General-purpose multi-armed bandit inspired by @ruvector/rvf-solver's * 18-bucket architecture. Provides explore/exploit decisions for any * context-dependent selection problem (skills, patterns, algorithms, tiers). * * Architecture: * - Contextual: separate Beta distributions per (context, arm) pair * - Two-signal: tracks both success rate (Beta) and cost (EMA) * - Serializable: full state can be persisted to JSON for cross-session learning */ /** Per-arm statistics */ export interface BanditArmStats { alpha: number; beta: number; pulls: number; totalReward: number; costEma: number; } /** Bandit configuration */ export interface BanditConfig { /** Cost weight in score calculation (default: 0.01) */ costWeight?: number; /** EMA decay factor for cost tracking (default: 0.1) */ costDecay?: number; /** Exploration bonus for under-sampled arms (default: 0.1) */ explorationBonus?: number; } /** Aggregate statistics */ export interface BanditStats { contexts: number; totalArms: number; totalPulls: number; totalReward: number; } /** Serialized state */ export interface BanditState { version: 1; config: Required; contexts: Record>; } /** * Thompson Sampling bandit with contextual arms. * * Usage: * const bandit = new SolverBandit(); * const arm = bandit.selectArm('code_review', ['skill-a', 'skill-b', 'skill-c']); * // ... execute the selected arm ... * bandit.recordReward('code_review', arm, 0.85); */ export declare class SolverBandit { private contexts; private config; constructor(config?: BanditConfig); /** * Select the best arm for a given context using Thompson Sampling. * * For each candidate arm, samples from its Beta(alpha, beta) distribution * and subtracts a cost penalty. Returns the arm with the highest score. * Unknown arms get an exploration bonus. */ selectArm(contextKey: string, armKeys: string[]): string; /** * Record the outcome of pulling an arm. * * @param contextKey - The context bucket (e.g., task type) * @param armKey - The arm that was pulled (e.g., skill name) * @param reward - Success signal in [0, 1] * @param cost - Optional cost signal (latency, tokens, etc.) */ recordReward(contextKey: string, armKey: string, reward: number, cost?: number): void; /** * Rerank a list of candidates using bandit scores. * Returns indices sorted by Thompson-sampled score (best first). */ rerank(contextKey: string, armKeys: string[]): string[]; /** Get arm stats for a specific context */ getArmStats(contextKey: string, armKey: string): BanditArmStats | null; /** Get aggregate statistics */ getStats(): BanditStats; /** Serialize to JSON-safe state */ serialize(): BanditState; /** Restore from serialized state */ static deserialize(state: BanditState): SolverBandit; /** Reset all learned state */ reset(): void; /** * Sample from Beta(a, b) using the Jöhnk algorithm. * Fast approximation for typical bandit parameters. */ private sampleBeta; /** * Sample from Gamma(shape, 1) using Marsaglia & Tsang's method. */ private sampleGamma; /** Box-Muller normal sample */ private sampleNormal; } //# sourceMappingURL=SolverBandit.d.ts.map