/** * Core types for LLM providers */ /** * Message roles in a conversation */ export type MessageRole = 'user' | 'assistant' | 'system'; /** * Content block types */ export type ContentBlockType = 'text' | 'tool_use' | 'tool_result' | 'thinking'; /** * Text content block */ export interface TextBlock { type: 'text'; text: string; } /** * Tool use content block (AI wants to call a tool) */ export interface ToolUseBlock { type: 'tool_use'; id: string; name: string; input: Record; /** * Thought signature for Gemini 3 function calls. * Required for Gemini 3 to maintain reasoning context. * @see https://ai.google.dev/gemini-api/docs/thought-signatures */ signature?: string; } /** * Tool result content block (result of a tool call) */ export interface ToolResultBlock { type: 'tool_result'; toolUseId: string; content: string; isError?: boolean; } /** * Thinking content block (Claude's reasoning process) */ export interface ThinkingBlock { type: 'thinking'; thinking: string; /** * Encrypted signature for verification when passing back to API */ signature?: string; } /** * Image content block (user-attached or tool-provided image for vision) */ export interface ImageBlock { type: 'image'; /** Base64-encoded image data */ data: string; /** MIME type: image/png, image/jpeg, image/webp, image/gif */ mediaType: string; /** Original filename (for display and observation masking placeholder) */ filename?: string; /** Image width in pixels */ width?: number; /** Image height in pixels */ height?: number; } /** * Union of all content block types */ export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | ThinkingBlock | ImageBlock; /** * A message in a conversation */ export interface Message { role: MessageRole; content: string | ContentBlock[]; } /** * Token usage from an LLM response (returned on 'done' chunks) */ export interface LLMUsage { inputTokens: number; outputTokens: number; cacheReadTokens?: number; cacheCreationTokens?: number; /** Thinking tokens (Gemini 2.5+ models with thinking) */ thinkingTokens?: number; /** Debug payload info - estimated token counts before sending to provider */ debugPayload?: { systemTokens: number; contentsTokens: number; toolsTokens: number; }; } /** * Streaming chunk types */ export interface StreamChunk { type: 'text' | 'tool_use_start' | 'tool_use_delta' | 'tool_use_end' | 'thinking_start' | 'thinking_delta' | 'thinking_end' | 'done'; text?: string; toolUse?: { id: string; name: string; input?: Record; /** * Thought signature for Gemini 3 function calls. * Only present on first function call in each step. */ signature?: string; }; /** * Thinking block data (for thinking_start/thinking_end) */ thinking?: { thinking?: string; signature?: string; }; /** * Token usage (only present on 'done' chunks) */ usage?: LLMUsage; /** * Model that generated this response (only present on 'done' chunks) */ model?: string; /** * Stop reason (only present on 'done' chunks). * - 'end_turn': Normal completion * - 'max_tokens': Hit max_tokens limit * - 'refusal': Model refused the request (Claude 4.5+) * - 'context_window_exceeded': Hit context window limit (Claude 4.5+) * - 'tool_use': Model wants to call a tool */ stopReason?: string; } /** * Extended thinking configuration * * @see https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking */ export interface ThinkingConfig { /** * Thinking mode: * - 'enabled': Manual budget (legacy, deprecated on Claude 4.6) * - 'adaptive': Model decides when/how much to think (Claude 4.6+) * - 'disabled': No thinking */ type: 'enabled' | 'adaptive' | 'disabled'; /** * Token budget for thinking (minimum 1024, must be less than maxTokens). * Only used with type: 'enabled'. Ignored for 'adaptive'. */ budgetTokens?: number; /** * Effort level for adaptive thinking (Claude 4.6+). * Controls how much the model thinks before responding. * - 'low': Minimal thinking, fastest response * - 'medium': Balanced * - 'high': Deep thinking, most thorough (Sonnet 4.6 default) * Only used with type: 'adaptive'. Ignored for 'enabled'. */ effort?: 'low' | 'medium' | 'high'; } /** * Options for chat requests */ export interface ChatOptions { model?: string; maxTokens?: number; temperature?: number; stopSequences?: string[]; tools?: ToolDefinition[]; /** * Extended thinking configuration (Claude-specific) * * When enabled, Claude will show its reasoning process before * providing the final response. Requires budget_tokens >= 1024. * * @example * ```typescript * thinking: { type: 'enabled', budgetTokens: 10000 } * ``` */ thinking?: ThinkingConfig; /** * AbortSignal for cancelling the LLM request. * When aborted, the provider should stop streaming and throw/return immediately. */ signal?: AbortSignal; /** * Enable prompt caching for system prompt and tools (Claude-specific) * * When enabled, the system prompt and tool definitions are cached * server-side, reducing token costs by up to 90% on subsequent requests. * * - Cache write: 1.25x base input cost (first request) * - Cache read: 0.1x base input cost (subsequent requests within 5 min) * * @default Provider-level setting (typically true) */ enablePromptCaching?: boolean; } /** * Tool definition for the LLM */ export interface ToolDefinition { name: string; description: string; inputSchema: { type: 'object'; properties: Record; required?: string[]; }; } /** * Result of a tool execution */ export interface ToolResult { toolUseId: string; result: unknown; isError?: boolean; } /** * LLM Provider interface - all providers must implement this */ /** * Interface for LLM providers. Implement this to add support for a new AI model. * * Built-in providers: `ClaudeProvider`, `OpenAIProvider`, `GeminiProvider`, * `OllamaProvider`, `TogetherProvider`, `GroqProvider`, `FireworksProvider`, * `PerplexityProvider`, `OpenRouterProvider`. * * For OpenAI-compatible APIs, extend `OpenAICompatibleProvider` instead of * implementing this interface directly. */ export interface LLMProvider { /** * Provider identifier (e.g., 'claude', 'openai', 'gemini') */ readonly name: string; /** * Send messages to the LLM and stream the response. * * Yields `StreamChunk` objects containing text fragments, tool calls, * usage stats, and other provider-specific data. */ chat(messages: Message[], options?: ChatOptions): AsyncIterable; /** * Count tokens in messages (optional, provider-specific) */ countTokens?(messages: Message[]): Promise; /** * Get the current default model ID. */ getModel(): string; /** * Change the default model for subsequent calls. Same provider only. * Takes effect on the next chat() call, not mid-stream. * * @param modelId - The new model ID (e.g., 'claude-opus-4-20250514') */ setModel(modelId: string): void; }