/** * LLM Service * * Provides a clean interface for LLM interactions with proper error handling, * retry logic, token management, and cost tracking. */ /** * Strip NUL bytes from a CLI prompt. Node's `child_process` rejects arguments * that contain a NUL ("must be a string without null bytes"), and a prompt built * from a git diff or file content can carry one (binary-ish content, a stray * control byte in source). Every CLI-based provider applies this before spawning, * so one bad byte never aborts an otherwise-valid call (e.g. the decisions * extractor consolidating a diff). */ export declare function sanitizeCliPrompt(prompt: string): string; /** * Claude Code CLI provider * * Routes LLM calls through the local `claude` CLI binary in non-interactive * mode (`claude -p ...`). Authentication is handled by the Claude Code session * (Max/Pro subscription) — no ANTHROPIC_API_KEY is required. */ export declare class ClaudeCodeProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private model; constructor(model?: string); generateCompletion(request: CompletionRequest): Promise; countTokens(text: string): number; } /** * Mistral Vibe CLI provider * * Routes LLM calls through the local `mistral-vibe` CLI binary (standalone, no npm). * No API key required — uses local LLM execution. * If the binary is not on PATH, set MISTRAL_VIBE_CLI to its full path. * The CLI is invoked as `vibe` (not `mistral-vibe`). */ export declare class MistralVibeProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private model; constructor(model?: string); generateCompletion(request: CompletionRequest): Promise; countTokens(text: string): number; } /** * Completion request parameters */ export interface CompletionRequest { systemPrompt: string; userPrompt: string; temperature?: number; maxTokens?: number; stopSequences?: string[]; responseFormat?: 'text' | 'json'; /** JSON Schema for structured output (used by OpenAI-compatible providers). */ jsonSchema?: object; } /** * Completion response */ export interface CompletionResponse { content: string; usage: { inputTokens: number; outputTokens: number; totalTokens: number; }; model: string; finishReason: 'stop' | 'length' | 'error'; } /** * LLM provider interface */ export interface LLMProvider { name: string; generateCompletion(request: CompletionRequest): Promise; countTokens(text: string): number; maxContextTokens: number; maxOutputTokens: number; } export type ProviderName = 'anthropic' | 'openai' | 'openai-compat' | 'copilot' | 'gemini' | 'gemini-cli' | 'claude-code' | 'mistral-vibe' | 'cursor-agent'; /** * Token usage tracking */ export interface TokenUsage { inputTokens: number; outputTokens: number; totalTokens: number; requests: number; } /** * Cost tracking */ export interface CostTracking { estimatedCost: number; currency: string; byProvider: Record; } /** * LLM service options */ export interface LLMServiceOptions { /** Primary provider to use */ provider?: ProviderName; /** Model override */ model?: string; /** Custom API base URL (e.g., for local/enterprise OpenAI-compatible servers) */ apiBase?: string; /** Disable SSL verification (for internal/self-signed certificates) */ sslVerify?: boolean; /** Base URL for openai-compat provider (overrides OPENAI_COMPAT_BASE_URL env var) */ openaiCompatBaseUrl?: string; /** Maximum retry attempts */ maxRetries?: number; /** Initial retry delay in ms */ initialDelay?: number; /** Maximum retry delay in ms */ maxDelay?: number; /** Request timeout in ms */ timeout?: number; /** Cost warning threshold in USD */ costWarningThreshold?: number; /** Log directory for prompts/responses */ logDir?: string; /** Enable prompt logging */ enableLogging?: boolean; /** Disable response_format field in requests (for endpoints that don't support it) */ disableResponseFormat?: boolean; } /** * Parse the number of milliseconds to wait before retrying a 429 response. * * Checks (in order): * 1. Standard `Retry-After` HTTP header (seconds as integer, or HTTP-date) * 2. `Limit resets at: YYYY-MM-DD HH:MM:SS UTC` in the response body * * Returns `undefined` when nothing useful is found so the caller can fall back * to its own exponential-backoff delay. */ export declare function parseRetryAfterMs(body: string, retryAfterHeader?: string | null): number | undefined; /** * Exported for use in pre-flight cost estimation. * Look up pricing for a model ID using prefix/family matching. * Exact match first, then longest prefix match, then provider default. * * This is robust to minor version suffixes like "claude-sonnet-4-6-20251120" * matching the "claude-sonnet-4" family entry. */ export declare function lookupPricing(providerName: string, modelId: string): { input: number; output: number; }; /** * Estimate token count from text (rough approximation) * ~4 characters per token for English text */ export declare function estimateTokens(text: string): number; /** * Anthropic Claude provider */ export declare class AnthropicProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private apiKey; private model; private baseUrl; constructor(apiKey: string, model?: string, baseUrl?: string, sslVerify?: boolean); countTokens(text: string): number; generateCompletion(request: CompletionRequest): Promise; private parseError; } /** * OpenAI provider */ export declare class OpenAIProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private apiKey; private model; private baseUrl; constructor(apiKey: string, model?: string, baseUrl?: string, sslVerify?: boolean); countTokens(text: string): number; generateCompletion(request: CompletionRequest): Promise; private parseError; } export declare class OpenAICompatibleProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private apiKey; private model; private baseUrl; private disableResponseFormat; constructor(apiKey: string, baseUrl: string, model?: string, disableResponseFormat?: boolean); countTokens(text: string): number; /** * Fetch available models from the API endpoint */ private fetchAvailableModels; /** * Get known models for common API endpoints when /models is not available */ private getKnownModelsForEndpoint; generateCompletion(request: CompletionRequest): Promise; } /** * GitHub Copilot provider via copilot-api proxy. * Requires a running copilot-api proxy (https://github.com/ericc-ch/copilot-api) * which exposes an OpenAI-compatible /v1/chat/completions endpoint. * * Required env vars: * COPILOT_API_BASE_URL — Base URL of the copilot-api proxy (default: http://localhost:4141/v1) * * Optional env vars: * COPILOT_API_KEY — API key if the proxy requires auth (default: "copilot") */ export declare class CopilotProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private apiKey; private model; private baseUrl; constructor(baseUrl: string, model?: string, apiKey?: string); countTokens(text: string): number; generateCompletion(request: CompletionRequest): Promise; } /** * Gemini CLI provider * * Routes LLM calls through the local `gemini` CLI binary in non-interactive * mode (`gemini -p ...`). Authentication is handled by the Google account * session — no GEMINI_API_KEY is required. * If the binary is not on PATH, set GEMINI_CLI to its full path. */ export declare class GeminiCLIProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private model; constructor(model?: string); generateCompletion(request: CompletionRequest): Promise; countTokens(text: string): number; } /** * Cursor Agent CLI provider * * Routes LLM calls through the Cursor Agent CLI in print mode (`-p`, JSON output). * Authentication is handled by Cursor (see Cursor CLI headless documentation) — * e.g. `cursor auth login` or `CURSOR_API_KEY` — not ANTHROPIC_API_KEY / OPENAI_API_KEY. * If the binary is not on PATH, set `CURSOR_AGENT_CLI` to its full path. */ export declare class CursorAgentProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private model; constructor(model?: string); generateCompletion(request: CompletionRequest): Promise; countTokens(text: string): number; } /** * Google Gemini provider */ export declare class GeminiProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private apiKey; private model; private baseUrl; constructor(apiKey: string, model?: string); countTokens(text: string): number; generateCompletion(request: CompletionRequest): Promise; } /** * Mock provider for testing */ export declare class MockLLMProvider implements LLMProvider { name: string; maxContextTokens: number; maxOutputTokens: number; private responses; private defaultResponse; callHistory: CompletionRequest[]; shouldFail: boolean; failCount: number; private currentFailCount; setResponse(promptContains: string, response: string): void; setDefaultResponse(response: string): void; countTokens(text: string): number; generateCompletion(request: CompletionRequest): Promise; reset(): void; } /** * LLM Service - main interface for LLM interactions */ export declare class LLMService { private provider; private retryConfig; private options; private tokenUsage; private costTracking; private requestLog; constructor(provider: LLMProvider, options?: LLMServiceOptions); /** * Get the provider name */ getProviderName(): string; /** * Get maximum context tokens for the provider */ getMaxContextTokens(): number; /** * Count tokens in text */ countTokens(text: string): number; /** * Get current token usage */ getTokenUsage(): TokenUsage; /** * Get current cost tracking */ getCostTracking(): CostTracking; /** * Reset usage tracking */ resetTracking(): void; /** * Generate a completion with retry logic */ complete(request: CompletionRequest): Promise; /** * Generate a completion expecting JSON response */ completeJSON(request: CompletionRequest, schema?: object): Promise; /** * Execute request with timeout */ private executeWithTimeout; /** * Update tracking after a successful request */ private updateTracking; /** * Calculate cost for a response */ private calculateCost; /** * Log request/response */ private logRequest; /** * Redact potential secrets from request */ private redactSecrets; /** * Simple schema validation */ private validateSchema; /** * Save logs to disk */ saveLogs(): Promise; /** * Sleep helper */ private sleep; } /** * Create an LLM service with the specified provider */ export declare function createLLMService(options?: LLMServiceOptions): LLMService; /** * Create an LLM service with a mock provider (for testing) */ export declare function createMockLLMService(options?: LLMServiceOptions): { service: LLMService; provider: MockLLMProvider; }; //# sourceMappingURL=llm-service.d.ts.map