/** * Token budget enforcement for LLM API calls. * * Provides pre-estimation, budget tracking, context window management, * and graceful truncation. */ import type { Message, LLMClient, CompletionOptions, StreamingOptions, StreamingResult, ProviderInfo } from './client.js'; /** * Token budget configuration options. */ export interface TokenBudgetOptions { /** Maximum total tokens for the session (input + output) */ maxTotalTokens?: number; /** Maximum input tokens per request */ maxInputTokensPerRequest?: number; /** Maximum output tokens per request */ maxOutputTokensPerRequest?: number; /** Warning threshold (0-1) - warn when this percentage of budget is used */ warningThreshold?: number; /** Reserve tokens for output (don't use entire context window for input) */ outputReserve?: number; /** Callback when approaching budget limit */ onBudgetWarning?: (used: number, total: number, percentage: number) => void; /** Callback when budget is exceeded */ onBudgetExceeded?: (used: number, total: number) => void; } /** * Token estimation result. */ export interface TokenEstimate { /** Estimated token count */ tokens: number; /** Whether this exceeds the context window */ exceedsContext: boolean; /** Context window size for the model */ contextWindow: number; /** Available tokens after this input */ availableForOutput: number; } /** * Budget status. */ export interface BudgetStatus { /** Total tokens used */ totalUsed: number; /** Total budget */ totalBudget: number; /** Percentage of budget used */ percentageUsed: number; /** Whether warning threshold is exceeded */ warningTriggered: boolean; /** Whether budget is exceeded */ budgetExceeded: boolean; /** Remaining tokens */ remaining: number; } /** * Estimate tokens from text using a character-based heuristic. * This is a rough approximation (~4 characters per token for English text). */ export declare function estimateTokens(text: string): number; /** * Estimate tokens for a message array. */ export declare function estimateMessagesTokens(messages: Message[]): number; /** * Get context window size for a model. */ export declare function getContextWindow(model: string): number; /** * Estimate tokens and check against context window. */ export declare function estimateWithContext(text: string | Message[], model: string, outputReserve?: number): TokenEstimate; /** * Truncate messages to fit within token budget using sliding window. * Keeps system message and most recent messages. */ export declare function truncateMessages(messages: Message[], maxTokens: number, options?: { keepSystemMessage?: boolean; minMessages?: number; }): Message[]; /** * Truncate text to approximately fit within token budget. */ export declare function truncateText(text: string, maxTokens: number): string; /** * Token budget tracker for a session. */ export declare class TokenBudgetTracker { private readonly options; private readonly onBudgetWarning?; private readonly onBudgetExceeded?; private totalInputTokens; private totalOutputTokens; private warningEmitted; constructor(options?: TokenBudgetOptions); /** * Record token usage. */ recordUsage(inputTokens: number, outputTokens: number): void; /** * Get current budget status. */ getStatus(): BudgetStatus; /** * Check if a request would exceed the budget. */ wouldExceedBudget(estimatedInputTokens: number, expectedOutputTokens?: number): boolean; /** * Get maximum safe input tokens for next request. */ getMaxSafeInputTokens(): number; /** * Reset the tracker. */ reset(): void; } /** * Budget-aware error. */ export declare class TokenBudgetExceededError extends Error { readonly used: number; readonly budget: number; readonly requested: number; constructor(used: number, budget: number, requested: number); } /** * Wrapper that enforces token budget on an LLM client. */ export declare class BudgetEnforcedLLMClient implements LLMClient { private readonly client; private readonly tracker; private readonly model; private readonly strict; constructor(client: LLMClient, options?: TokenBudgetOptions & { model?: string; strict?: boolean; }); getProviderInfo(): ProviderInfo; chat(messages: Message[], options?: CompletionOptions): Promise; complete(prompt: string, options?: CompletionOptions): Promise; stream(prompt: string, options?: StreamingOptions): Promise; streamChat(messages: Message[], options?: StreamingOptions): Promise; parseJSON(response: string): T; /** * Get current budget status. */ getBudgetStatus(): BudgetStatus; /** * Reset the budget tracker. */ resetBudget(): void; } /** * Create a budget-enforced wrapper around an LLM client. */ export declare function withTokenBudget(client: LLMClient, options?: TokenBudgetOptions & { model?: string; strict?: boolean; }): BudgetEnforcedLLMClient; //# sourceMappingURL=token-budget.d.ts.map