/** * ClaudeProvider - Anthropic Claude API implementation * * @example * ```typescript * const provider = new ClaudeProvider({ * apiKey: process.env.ANTHROPIC_API_KEY, * }); * * const agent = new Agent({ provider }); * ``` */ import type { LLMProvider, Message, ChatOptions, StreamChunk } from './types.js'; /** * Configuration for ClaudeProvider */ export interface ClaudeProviderConfig { /** * Anthropic API key */ apiKey: string; /** * Default model to use * @default 'claude-sonnet-4-6' */ model?: string; /** * Base URL for API (useful for proxies) */ baseURL?: string; /** * Default max tokens * @default 4096 */ maxTokens?: number; /** * Enable prompt caching for system prompt and tools. * * When enabled, the system prompt and tool definitions are cached * server-side, reducing token costs by up to 90% on subsequent requests. * * - Cache write: 1.25x base input cost (first request) * - Cache read: 0.1x base input cost (subsequent requests within 5 min) * * @default true */ enablePromptCaching?: boolean; /** * Enable token-efficient tool use (Anthropic beta). * Sends compact tool representation, reducing input tokens. * No-op for Claude 4+ (already default). * @default true */ enableTokenEfficientTools?: boolean; /** * Optional token estimator function (e.g., tiktoken). * When provided, debug payload reports token counts instead of char-based estimates. * Fallback: Math.ceil(text.length / 4) */ estimateTokens?: (text: string) => number; /** * Enable extended context window (1M tokens for supported Claude models). * Sends `context-1m-2025-08-07` beta header. * Long-context pricing applies above 200K tokens per request. * @default false */ enableExtendedContext?: boolean; } /** * LLM provider for Anthropic's Claude models (Opus, Sonnet, Haiku). * * Supports streaming, tool use, prompt caching, extended context (1M tokens), * and token-efficient tool schemas. * * @example * ```typescript * const provider = new ClaudeProvider({ * apiKey: process.env.ANTHROPIC_API_KEY, * model: 'claude-sonnet-4-20250514', * }); * * const agent = new Agent({ * provider, * systemPrompt: 'You are a helpful assistant.', * }); * ``` * * @example * ```typescript * // Using the factory function * const provider = createClaudeProvider({ * apiKey: 'sk-ant-...', * enableExtendedContext: true, // 1M token context * }); * ``` */ export declare class ClaudeProvider implements LLMProvider { readonly name = "claude"; private readonly client; private defaultModel; private readonly defaultMaxTokens; private readonly enablePromptCaching; private readonly enableTokenEfficientTools; private readonly enableExtendedContext; private readonly estimateTokensFn; constructor(config: ClaudeProviderConfig); getModel(): string; setModel(modelId: string): void; /** * Send messages and stream the response */ chat(messages: Message[], options?: ChatOptions): AsyncIterable; /** * Count tokens in messages using tiktoken (cl100k_base encoding) */ countTokens(messages: Message[]): Promise; /** * Check if a model is Claude 4+ (token-efficient tools are built-in, no header needed). */ private isClaude4Plus; /** * Build request options with optional abort signal and beta headers. * Combines multiple beta features (comma-separated per Anthropic API spec). */ private buildRequestOptions; /** * Convert our Message format to Anthropic's format */ private convertMessages; /** * Convert content to Anthropic's content block format */ private convertContent; /** * Convert our ToolDefinition to Anthropic's Tool format */ private convertTools; /** * Convert thinking config to Anthropic API format. * * Supports both legacy (enabled + budget_tokens) and adaptive (Claude 4.6+) modes. * Adaptive mode uses output_config.effort instead of budget_tokens. */ private convertThinking; /** * Wrap system prompt in array format with cache_control for prompt caching. * * When enabled, the system prompt is cached server-side for 5 minutes, * reducing token costs by up to 90% on subsequent requests. */ private wrapSystemPromptWithCache; /** * Add cache_control breakpoint to conversation messages. * * Caches the conversation history prefix (all messages except the most recent turn). * This avoids re-processing the entire history on each API call. * Only applies when there are enough messages to benefit (>= 4 messages = 2+ turns). * * Strategy: place cache_control on the last content block of the second-to-last * user message. This caches system + tools + all messages up to that point. * Only the most recent user message is uncached (and processed at full cost). */ private addCacheControlToMessages; /** * Add cache_control to the last tool definition. * * This caches ALL tool definitions as a single prefix (tools are * cached cumulatively up to the cache_control marker). */ private addCacheControlToLastTool; /** * Process a stream event into StreamChunks */ private processEvent; /** * Process a content block delta event */ private processDelta; /** * Map Anthropic SDK errors to ProviderError */ private mapError; } /** * Create a ClaudeProvider with API key from environment */ export declare function createClaudeProvider(config?: Partial): ClaudeProvider;