/** * @license * Copyright 2025 Vybestack LLC * SPDX-License-Identifier: Apache-2.0 * * @plan PLAN-20251211issue486b * Phase 1: LoadBalancingProvider Skeleton Implementation * * This provider wraps multiple sub-profile configurations and delegates * requests to the appropriate provider based on a round-robin strategy. * Selection happens at REQUEST TIME, not profile load time. */ import { IProvider, GenerateChatOptions, ProviderToolset } from './IProvider.js'; import { IModel } from './IModel.js'; import { IContent } from '../services/history/IContent.js'; import { ProviderManager } from './ProviderManager.js'; import type { Profile } from '../types/modelParams.js'; /** * Sub-profile configuration for load balancing */ export interface LoadBalancerSubProfile { name: string; providerName: string; modelId?: string; baseURL?: string; authToken?: string; } /** * Load balancing provider configuration * @plan PLAN-20251211issue486c - Updated to support ResolvedSubProfile * @plan PLAN-20251212issue488 - Added failover strategy */ export interface LoadBalancingProviderConfig { profileName: string; strategy: 'round-robin' | 'failover'; subProfiles: ResolvedSubProfile[] | LoadBalancerSubProfile[]; lbProfileEphemeralSettings?: Record; } /** * Backend performance metrics * @plan PLAN-20251212issue489 - Phase 1 */ export interface BackendMetrics { requests: number; successes: number; failures: number; timeouts: number; tokens: number; totalLatencyMs: number; avgLatencyMs: number; } /** * Circuit breaker state for a backend * @plan PLAN-20251212issue489 - Phase 1 */ export interface CircuitBreakerState { state: 'closed' | 'open' | 'half-open'; failures: Array<{ timestamp: number; error: Error; }>; openedAt?: number; lastAttempt?: number; } /** * Load balancer statistics interface */ export interface LoadBalancerStats { profileName: string; totalRequests: number; lastSelected: string | null; profileCounts: Record; } /** * Extended load balancer statistics with metrics * @plan PLAN-20251212issue489 - Phase 1 */ export interface ExtendedLoadBalancerStats extends LoadBalancerStats { backendMetrics: Record; circuitBreakerStates: Record; currentTPM: Record; } /** * Resolved sub-profile with all settings needed for provider instantiation * @plan PLAN-20251211issue486c */ export interface ResolvedSubProfile { name: string; providerName: string; model: string; baseURL?: string; authToken?: string; authKeyfile?: string; ephemeralSettings: Record; modelParams: Record; } /** * Type guard to identify load balancer profile format * @plan PLAN-20251211issue486c */ export declare function isLoadBalancerProfileFormat(profile: Profile): boolean; /** * Type guard to identify ResolvedSubProfile * @plan PLAN-20251211issue486c */ export declare function isResolvedSubProfile(profile: ResolvedSubProfile | LoadBalancerSubProfile): profile is ResolvedSubProfile; /** * Load balancing provider that distributes requests across multiple sub-profiles */ export declare class LoadBalancingProvider implements IProvider { private readonly config; private readonly providerManager; readonly name = "load-balancer"; private roundRobinIndex; private readonly logger; private stats; private lastSelected; private totalRequests; private circuitBreakerStates; private tpmBuckets; private backendMetrics; private currentFailoverIndex; constructor(config: LoadBalancingProviderConfig, providerManager: ProviderManager); /** * Validate the load balancing configuration * @plan PLAN-20251211issue486c - Updated to handle ResolvedSubProfile */ private validateConfig; /** * Select the next sub-profile using round-robin strategy * Returns the sub-profile at the current index, then increments and wraps around * @plan PLAN-20251211issue486c - Updated to return union type */ selectNextSubProfile(): ResolvedSubProfile | LoadBalancerSubProfile; /** * Get available models (stub for Phase 1) * Will be implemented in later phases to aggregate models from all sub-profiles */ getModels(): Promise; /** * Generate chat completion by delegating to selected sub-profile provider * Phase 3c: Request Delegation with Settings Merge Implementation * @plan PLAN-20251211issue486c */ generateChatCompletion(options: GenerateChatOptions): AsyncIterableIterator; generateChatCompletion(content: IContent[], tools?: ProviderToolset): AsyncIterableIterator; /** * Get default model (stub for Phase 1) * Will be implemented in later phases to return first sub-profile's model */ getDefaultModel(): string; /** * Get server tools (stub for Phase 1) * Will be implemented in later phases to aggregate tools from delegate providers */ getServerTools(): string[]; /** * Invoke server tool (stub for Phase 1) * Will be implemented in later phases to delegate to appropriate provider */ invokeServerTool(toolName: string, _params: unknown, _config?: unknown, _signal?: AbortSignal): Promise; /** * Increment stats for a sub-profile * Phase 5: Stats Integration */ private incrementStats; /** * Get load balancer statistics * Phase 5: Stats Integration * @plan PLAN-20251212issue489 - Phase 2: Updated to return ExtendedLoadBalancerStats */ getStats(): ExtendedLoadBalancerStats; /** * Reset statistics (optional method for testing/debugging) * Phase 5: Stats Integration */ resetStats(): void; /** * Get current failover index (for testing/debugging) * @plan PLAN-20251217issue902 - Sticky failover behavior */ getCurrentFailoverIndex(): number; /** * Reset failover index to 0 (for testing) * @plan PLAN-20251217issue902 - Sticky failover behavior */ resetFailoverIndex(): void; /** * Extract failover settings from ephemeral settings * @plan PLAN-20251212issue488 * @plan PLAN-20251212issue489 - Phase 1: Extended with advanced settings */ private extractFailoverSettings; /** * Determine if error should trigger failover * @plan PLAN-20251212issue488 */ private shouldFailover; /** * Check if error should trigger immediate failover (no retry) * @plan PLAN-20251217issue902 - Sticky failover behavior * * These status codes indicate the backend cannot serve requests * and retrying would be futile: * - 429: Rate limited * - 401: Unauthorized (per Issue #902 spec; OAuth bucket failover has * separate auto-renew logic that doesn't apply to load balancer) * - 402: Payment required * - 403: Forbidden */ private isImmediateFailoverError; /** * Build resolved options for a sub-profile * @plan PLAN-20251212issue488 */ private buildResolvedOptions; /** * Initialize circuit breaker state for a backend * @plan PLAN-20251212issue489 - Phase 2 */ private initCircuitBreakerState; /** * Check if backend is healthy (circuit breaker check) * @plan PLAN-20251212issue489 - Phase 2 */ private isBackendHealthy; /** * Record successful backend request (circuit breaker) * @plan PLAN-20251212issue489 - Phase 2 */ private recordBackendSuccess; /** * Record backend failure (circuit breaker) * @plan PLAN-20251212issue489 - Phase 2 */ private recordBackendFailure; /** * Wrap iterator with timeout for first chunk * @plan PLAN-20251212issue489 - Phase 3 */ private wrapWithTimeout; /** * Check if error is a timeout error * @plan PLAN-20251212issue489 - Phase 3 */ private isTimeoutError; /** * Update TPM tracking with new tokens * @plan PLAN-20251212issue489 - Phase 4 */ private updateTPM; /** * Calculate TPM for a profile using 5-minute rolling window * @plan PLAN-20251212issue489 - Phase 4 */ private calculateTPM; /** * Check if backend should be skipped due to low TPM * @plan PLAN-20251212issue489 - Phase 4 */ private shouldSkipOnTPM; /** * Extract token count from response chunks * @plan PLAN-20251212issue489 - Phase 4/5 */ private extractTokenCount; /** * Initialize backend metrics * @plan PLAN-20251212issue489 - Phase 5 */ private initBackendMetrics; /** * Record request start and return start time * @plan PLAN-20251212issue489 - Phase 5 */ private recordRequestStart; /** * Record successful request * @plan PLAN-20251212issue489 - Phase 5 */ private recordRequestSuccess; /** * Record failed request * @plan PLAN-20251212issue489 - Phase 5 */ private recordRequestFailure; /** * Execute with failover strategy * @plan PLAN-20251212issue488 * @plan PLAN-20251212issue489 - Phase 2: Updated with circuit breaker integration * @plan PLAN-20251217issue902 - Sticky failover: start from last successful backend */ private executeWithFailover; /** * Get auth token - required by ProviderManager.normalizeRuntimeInputs validation * @plan:PLAN-20251211issue486b - Auth token resolution * * The load-balancer doesn't use this token directly; it passes authToken * via options.resolved to the delegate provider. This method exists to * satisfy ProviderManager's canResolveAuth check so it doesn't fail * validation before delegation can happen. */ getAuthToken(): Promise; }