import type { IToolRuntime } from './toolRuntime.js'; import { type ConversationMessage, type LLMProvider, type ProviderModelInfo, type ProviderUsage } from './types.js'; import { ContextManager } from './contextManager.js'; import { type PasteSummary } from './multilinePasteHandler.js'; export interface AgentCallbacks { onAssistantMessage?(content: string, metadata: AssistantMessageMetadata): void; onStreamChunk?(chunk: string, type?: 'content' | 'reasoning'): void; /** Called when real token usage is received from the provider during streaming */ onUsage?(usage: ProviderUsage): void; onContextPruned?(removedCount: number, stats: Record): void; /** Called when auto context squishing occurs in background */ onContextSquishing?(message: string): void; /** Called when context recovery from overflow is attempted */ onContextRecovery?(attempt: number, maxAttempts: number, message: string): void; /** Called when agent continues after context recovery - useful for updating UI */ onContinueAfterRecovery?(): void; /** Called when multi-line paste is detected - displays summary instead of full content */ onMultilinePaste?(summary: string, metadata: PasteSummary): void; /** Called when verification should be triggered for a final response */ onVerificationNeeded?(response: string, context: VerificationCallbackContext): void; /** Called when the operation is cancelled by the user */ onCancelled?(): void; /** Called when tool execution starts - useful for updating activity status */ onToolExecution?(toolName: string, isStart: boolean, args?: Record): void; /** Called when the agent generates an explanation for a completed edit */ onEditExplanation?(payload: EditExplanationPayload): void; /** * Called IMMEDIATELY when a user request is received, BEFORE any provider call. * Useful for updating UI activity without showing filler messages. */ onRequestReceived?(requestPreview: string): void; /** * Called BEFORE the first tool call in a turn, allowing UI to update activity state. * @param toolNames - Names of tools about to be called * @param hasModelNarration - Whether the model provided narration/thinking before tools * @returns Optional acknowledgement text to display (if model didn't provide narration) */ onBeforeFirstToolCall?(toolNames: string[], hasModelNarration: boolean): string | undefined; /** * Called when the agent encounters a transient error and will retry. * @param attempt - Current retry attempt number * @param maxAttempts - Maximum retry attempts * @param error - The error that triggered the retry */ onRetrying?(attempt: number, maxAttempts: number, error: Error): void; } export interface ToolExecutionRecord { name: string; success: boolean; hasOutput: boolean; } export interface VerificationCallbackContext { /** Working directory for verification */ workingDirectory: string; /** Recent conversation history for context */ conversationHistory: string[]; /** Provider ID */ provider: string; /** Model ID */ model: string; } export interface AssistantMessageMetadata { isFinal: boolean; elapsedMs?: number; usage?: ProviderUsage | null; contextStats?: Record | null; /** True if content was already displayed via streaming chunks */ wasStreamed?: boolean; /** Hint to UI to suppress rendering of internal/system filler messages */ suppressDisplay?: boolean; } export interface EditExplanationPayload { explanation: string; files: string[]; toolName: string; toolCallId?: string; } interface AgentOptions { provider: LLMProvider; toolRuntime: IToolRuntime; systemPrompt: string; callbacks?: AgentCallbacks; contextManager?: ContextManager; /** Provider ID for verification context */ providerId?: string; /** Model ID for verification context */ modelId?: string; /** Working directory for verification */ workingDirectory?: string; /** Whether to generate and surface explanations after edit tools complete */ explainEdits?: boolean; } export declare class AgentRuntime { private readonly messages; private readonly provider; private readonly toolRuntime; private readonly callbacks; private readonly contextManager; private activeRun; private readonly baseSystemPrompt; private readonly providerId; private readonly modelId; private readonly workingDirectory; private readonly explainEdits; private cancellationRequested; private lastToolCallSignature; private repeatedToolCallCount; private static readonly MAX_REPEATED_TOOL_CALLS; private totalContextRecoveries; private static readonly MAX_TOTAL_RECOVERIES; private recentToolCalls; private static readonly TOOL_HISTORY_SIZE; private static readonly BEHAVIORAL_LOOP_THRESHOLD; private static readonly EDIT_CONTEXT_CHAR_LIMIT; private static readonly NON_CACHEABLE_TOOL_NAMES; private static readonly LOOP_EXEMPT_TOOL_NAMES; private toolResultCache; private static readonly TOOL_CACHE_MAX_SIZE; private toolHistoryCursor; private modelInfo; private modelInfoFetched; constructor(options: AgentOptions); /** * Request cancellation of the current operation. * The agent will stop at the next safe point (after current tool completes). */ requestCancellation(): void; /** * Check if cancellation has been requested. */ isCancellationRequested(): boolean; /** * Check if the agent is currently processing a request. */ isRunning(): boolean; /** * Check if any of the tool calls are edit operations (Edit, Write) */ private isEditToolCall; /** * Extract a display-friendly file path from a tool call (prefers workspace-relative path) */ private getEditedFilePath; /** * Get the file paths from edit tool calls for the explanation prompt */ private getEditedFiles; send(text: string, useStreaming?: boolean): Promise; private processConversation; private processConversationStreaming; /** * Execute tool calls with optimized concurrency * * PERF: Uses Promise.all for parallel execution with early result handling. * Results are collected in order but execution happens concurrently. * For very large batches (>10 tools), uses chunked execution to prevent * overwhelming system resources. */ private resolveToolCalls; private truncateEditOutput; private buildEditExplanationPrompt; /** * Extract clean explanation from model output that may contain reasoning. * Reasoning models like deepseek-reasoner output chain-of-thought which we need to filter. */ private extractCleanExplanation; private maybeExplainEdits; private get providerTools(); /** * Whether to suppress tool narration in the content field. * Previously suppressed for OpenAI but now we show all thinking/narration. */ private shouldSuppressToolNarration; private emitAssistantMessage; /** * Trigger verification for a final response if callback is registered * and response contains verifiable claims (implementation, build success, etc.) */ private triggerVerificationIfNeeded; /** * Extract a "command hash" from tool arguments for behavioral loop detection. * For execute_bash, this is the actual command. For other tools, key identifying args. */ private extractCmdHash; /** * Check for behavioral loops - model calling the same tool with similar args repeatedly. * Returns an error message if a loop is detected, null otherwise. * * FUNDAMENTAL PREVENTION: Cached calls are excluded from loop detection since they * don't actually execute (the cache provides the result). This means: * - First call: executes and caches result * - Second identical call: returns cached result, NOT counted toward loop * - Only genuinely NEW (non-cached) repetitive calls trigger loop detection * * Direct execution tools (bash/edit) are also exempt to avoid short-circuiting * legitimate repeated user commands. * * This catches patterns like: * - "git status -sb" called 3 times with DIFFERENT outputs (cache miss each time) * - Repeated file reads where file content changed * - Repeated searches with same pattern but new results */ private checkBehavioralLoop; /** * Provide an acknowledgement before the first tool call when the model * hasn't narrated its plan. This keeps the UI responsive and lets the * user know work is happening even before tool output arrives. */ private maybeAckToolCalls; /** * Reset behavioral loop tracking (called when user provides new input or task completes) */ private resetBehavioralLoopTracking; /** * Create a stable cache key for a tool call based on name and arguments */ private getToolCacheKey; /** * Only cache tools that are safe to reuse; stateful commands must always execute. */ private isCacheableTool; /** * Direct execution tools should not trigger behavioral loop short-circuiting. */ private shouldSkipLoopDetection; /** * Get cached result for a tool call, or null if not cached */ private getCachedToolResult; /** * Cache a tool result for future identical calls */ private cacheToolResult; /** * Drain the list of tools executed during the most recent send() call. * Used by higher-level orchestrators to reason about progress. */ drainToolExecutions(): ToolExecutionRecord[]; getHistory(): ConversationMessage[]; loadHistory(history: ConversationMessage[]): void; clearHistory(): void; /** * Prune messages if approaching context limit * * This runs BEFORE each generation to ensure we stay within budget. * If LLM summarization is available, it will create intelligent summaries * instead of just removing old messages. */ private pruneMessagesIfNeeded; /** * Get current context statistics */ private getContextStats; /** * Get context manager instance */ getContextManager(): ContextManager | null; /** * Fetch model info from the provider API. * Returns context window and token limits from the real API. * Results are cached for the lifetime of this agent instance. */ fetchModelInfo(): Promise; /** * Get cached model info (must call fetchModelInfo first) */ getModelInfo(): ProviderModelInfo | null; /** * Get the context window size from the provider API. * Returns null if the provider doesn't support this or the API call fails. */ getContextWindowFromProvider(): Promise; /** * Auto-recover from context overflow errors by aggressively pruning messages. * * This is called when an API call fails due to context length exceeding limits. * It performs increasingly aggressive pruning on each attempt: * - Attempt 1: Remove 30% of oldest messages + truncate tool outputs to 5k * - Attempt 2: Remove 50% of oldest messages + truncate tool outputs to 2k * - Attempt 3: Remove 70% of oldest messages + truncate tool outputs to 500 chars * * @returns true if recovery was successful (context was reduced) */ private recoverFromContextOverflow; } export {}; //# sourceMappingURL=agent.d.ts.map