/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { GenerateContentResponse, type Content, type GenerateContentConfig, type SendMessageParameters, type Part, GenerateContentResponseUsageMetadata, type Tool, type PartListUnion } from '@google/genai'; import type { CompletedToolCall } from './coreToolScheduler.js'; import { type ContentGenerator } from './contentGenerator.js'; import { HistoryService } from '../services/history/HistoryService.js'; import type { IContent, ContentBlock } from '../services/history/IContent.js'; import type { AgentRuntimeContext, ToolRegistryView } from '../runtime/AgentRuntimeContext.js'; export declare enum StreamEventType { /** A regular content chunk from the API. */ CHUNK = "chunk", /** A signal that a retry is about to happen. The UI should discard any partial * content from the attempt that just failed. */ RETRY = "retry" } export type StreamEvent = { type: StreamEventType.CHUNK; value: GenerateContentResponse; } | { type: StreamEventType.RETRY; }; /** * Aggregates text from content blocks while preserving spacing around non-text blocks. * When thinking blocks (or other non-text blocks) appear between text chunks, this ensures * proper spacing is maintained in the aggregated output. * * @param blocks - Array of content blocks to process * @param currentText - The current accumulated text * @param lastBlockWasNonText - Whether the previous block was a non-text block * @returns Object containing the aggregated text and the updated non-text flag */ export declare function aggregateTextWithSpacing(blocks: ContentBlock[], currentText: string, lastBlockWasNonText: boolean): { text: string; lastBlockWasNonText: boolean; }; /** * Checks if a part contains valid non-thought text content. * This helps in consolidating text parts properly during stream processing. */ export declare function isValidNonThoughtTextPart(part: Part): boolean; /** * Custom error to signal that a stream completed with invalid content, * which should trigger a retry. */ export declare class InvalidStreamError extends Error { readonly type: 'NO_FINISH_REASON' | 'NO_RESPONSE_TEXT' | 'NO_FINISH_REASON_NO_TEXT' | 'MALFORMED_FUNCTION_CALL'; constructor(message: string, type: 'NO_FINISH_REASON' | 'NO_RESPONSE_TEXT' | 'NO_FINISH_REASON_NO_TEXT' | 'MALFORMED_FUNCTION_CALL'); } /** * Legacy error class for backward compatibility. */ export declare class EmptyStreamError extends Error { constructor(message: string); } /** * Chat session that enables sending messages to the model with previous * conversation context. * * @remarks * The session maintains all the turns between user and model. */ export declare class GeminiChat { private static readonly TOKEN_SAFETY_MARGIN; private static readonly DEFAULT_COMPLETION_BUDGET; private sendPromise; private compressionPromise; private logger; private lastPromptTokenCount; /** * Tracks consecutive compression failures for cooldown logic. * * @plan PLAN-20260218-COMPRESSION-RETRY.P01 * @requirement REQ-CR-005 */ private compressionFailureCount; /** * Timestamp (ms) of the most recent compression failure, or null if none. * * @plan PLAN-20260218-COMPRESSION-RETRY.P01 * @requirement REQ-CR-005 */ private lastCompressionFailureTime; /** Cooldown period in ms — skip compression when 3+ failures within this window. */ private static readonly COMPRESSION_COOLDOWN_MS; /** Number of consecutive failures before entering cooldown. */ private static readonly COMPRESSION_FAILURE_THRESHOLD; /** * Optional callback that supplies formatted active todo items for compression. * Set by the owning client so the compression context can include todo awareness * without GeminiChat depending on the todo system directly. */ private activeTodosProvider?; /** * Density dirty flag — tracks whether new content has been added since last optimization. * @plan PLAN-20260211-HIGHDENSITY.P20 * @requirement REQ-HD-002.6, REQ-HD-002.7 */ private densityDirty; /** * Suppresses densityDirty from being set during compression rebuilds. * @plan PLAN-20260211-HIGHDENSITY.P20 * @requirement REQ-HD-002.6 */ private _suppressDensityDirty; private readonly generationConfig; /** * Runtime state for stateless operation (Phase 6) * @plan PLAN-20251028-STATELESS6.P10 * @requirement REQ-STAT6-001.2 * @pseudocode agent-runtime-context.md lines 83-91 (step 006) */ private readonly runtimeState; private readonly historyService; private readonly runtimeContext; /** * Gets the last prompt token count. */ getLastPromptTokenCount(): number; /** * @plan PLAN-20251028-STATELESS6.P10 * @requirement REQ-STAT6-001.2, REQ-STAT6-002.2, REQ-STAT6-002.3 * @pseudocode agent-runtime-context.md lines 83-91 (step 006.1-006.2) * * Phase 6 constructor: Accept AgentRuntimeContext as first parameter * Eliminates Config dependency by using runtime view adapters */ constructor(view: AgentRuntimeContext, contentGenerator: ContentGenerator, generationConfig?: GenerateContentConfig, initialHistory?: Content[]); /** * Create a position-based matcher for Gemini tool responses. * It returns the next unmatched tool call from the current history. */ private makePositionMatcher; private _getRequestTextFromContents; private buildProviderRuntime; private extractDirectGeminiOverrides; /** * @plan PLAN-20251028-STATELESS6.P10 * @requirement REQ-STAT6-002.3 * @pseudocode agent-runtime-context.md line 88 (step 006.5) */ private _logApiRequest; /** * @plan PLAN-20251028-STATELESS6.P10 * @requirement REQ-STAT6-002.3 * @pseudocode agent-runtime-context.md line 88 (step 006.5) */ private _logApiResponse; /** * @plan PLAN-20251028-STATELESS6.P10 * @requirement REQ-STAT6-002.3 * @pseudocode agent-runtime-context.md line 88 (step 006.5) */ private _logApiError; setSystemInstruction(sysInstr: string): void; /** * Get the underlying HistoryService instance * @returns The HistoryService managing conversation history */ getHistoryService(): HistoryService; /** * Wait until any in-flight send/stream has completed and history has been committed. * This is used by provider switching to avoid capturing partial turns. */ waitForIdle(): Promise; getToolsView(): ToolRegistryView; /** * Sends a message to the model and returns the response. * * @remarks * This method will wait for the previous message to be processed before * sending the next message. * * @see {@link Chat#sendMessageStream} for streaming method. * @param params - parameters for sending messages within a chat session. * @returns The model's response. * * @example * ```ts * const chat = ai.chats.create({model: 'gemini-2.0-flash'}); * const response = await chat.sendMessage({ * message: 'Why is the sky blue?' * }); * console.log(response.text); * ``` */ sendMessage(params: SendMessageParameters, prompt_id: string): Promise; /** * Sends a message to the model and returns the response in chunks. * * @remarks * This method will wait for the previous message to be processed before * sending the next message. * * @see {@link Chat#sendMessage} for non-streaming method. * @param params - parameters for sending the message. * @return The model's response. * * @example * ```ts * const chat = ai.chats.create({model: 'gemini-2.0-flash'}); * const response = await chat.sendMessageStream({ * message: 'Why is the sky blue?' * }); * for await (const chunk of response) { * console.log(chunk.text); * } * ``` */ sendMessageStream(params: SendMessageParameters, prompt_id: string): Promise>; generateDirectMessage(params: SendMessageParameters, prompt_id: string): Promise; private makeApiCallAndProcessStream; /** * Returns the chat history. * * @remarks * The history is a list of contents alternating between user and model. * * There are two types of history: * - The `curated history` contains only the valid turns between user and * model, which will be included in the subsequent requests sent to the model. * - The `comprehensive history` contains all turns, including invalid or * empty model outputs, providing a complete record of the history. * * The history is updated after receiving the response from the model, * for streaming response, it means receiving the last chunk of the response. * * The `comprehensive history` is returned by default. To get the `curated * history`, set the `curated` parameter to `true`. * * @param curated - whether to return the curated history or the comprehensive * history. * @return History contents alternating between user and model for the entire * chat session. */ getHistory(curated?: boolean): Content[]; /** * Clears the chat history. */ clearHistory(): void; /** * Adds a new entry to the chat history. */ addHistory(content: Content): void; setHistory(history: Content[]): void; setTools(tools: Tool[]): void; clearTools(): void; /** * Register a callback that provides formatted active todo items. * Called during compression to supply todo context to the summarizer. */ setActiveTodosProvider(provider: () => Promise): void; /** * Calculate effective token count based on reasoning settings. * This accounts for whether reasoning will be included in API calls. * * @plan PLAN-20251202-THINKING.P15 * @requirement REQ-THINK-005.1, REQ-THINK-005.2 */ private getEffectiveTokenCount; /** * Run density optimization if the active strategy supports it and new content exists. * Called before the threshold check in ensureCompressionBeforeSend and enforceContextWindow. * * @plan PLAN-20260211-HIGHDENSITY.P20 * @requirement REQ-HD-002.1, REQ-HD-002.2, REQ-HD-002.3, REQ-HD-002.4, REQ-HD-002.5, REQ-HD-002.7, REQ-HD-002.9 * @pseudocode orchestration.md lines 50-99 */ private ensureDensityOptimized; /** * Check if compression is needed based on token count. * * Token calculation includes system prompt in both paths: * 1. When lastPromptTokenCount (actual API data) is available - it already includes * the system prompt as part of the request sent to the API * 2. When falling back to getEffectiveTokenCount() - it uses historyService.getTotalTokens() * which adds baseTokenOffset (system prompt tokens) to history tokens * * NOTE: System prompt is NEVER compressed - it is static and critical. Only conversation * history is subject to compression via the configured compression strategy. * * @plan PLAN-20251028-STATELESS6.P10 * @requirement REQ-STAT6-002.2 * @pseudocode agent-runtime-context.md line 86 (step 006.3) */ private shouldCompress; private ensureCompressionBeforeSend; private estimatePendingTokens; private asNumber; private extractCompletionBudgetFromParams; private getCompletionBudget; private enforceContextWindow; /** * Perform compression of chat history with retry, fallback, and cooldown. * * - Transient errors (429, 5xx, network) are retried up to 3 times with backoff. * - After exhausting retries, falls back to TopDownTruncationStrategy (no LLM). * - After 3 consecutive failures within the 60-second cooldown window, * compression is skipped entirely to avoid blocking the conversation. * - Successful compression resets the failure counters. * * @plan PLAN-20260211-COMPRESSION.P14 * @plan PLAN-20260218-COMPRESSION-RETRY.P01 * @requirement REQ-CS-006.1, REQ-CS-002.9, REQ-CR-003, REQ-CR-004, REQ-CR-005 */ performCompression(prompt_id: string): Promise; /** * Returns true if compression should be skipped due to repeated recent failures. * * @plan PLAN-20260218-COMPRESSION-RETRY.P01 * @requirement REQ-CR-005 */ private isCompressionInCooldown; /** * Execute compression using the primary strategy with retry for transient * errors, falling back to TopDownTruncationStrategy if all transient retries fail. * Permanent errors are re-thrown immediately without attempting a fallback. * * @plan PLAN-20260218-COMPRESSION-RETRY.P01 * @requirement REQ-CR-003, REQ-CR-004, REQ-CR-005 */ private runCompressionWithRetryAndFallback; /** * Attempt compression using TopDownTruncationStrategy as a fallback. * This strategy never requires an LLM call and always succeeds if there is * any history at all. If it also fails, we log and continue without * compressing to avoid blocking the conversation. * * @plan PLAN-20260218-COMPRESSION-RETRY.P01 * @requirement REQ-CR-004, REQ-CR-005 */ private performFallbackCompression; /** * Build the {@link CompressionContext} that strategies receive. * Keeps historyService out of the strategy boundary. * * @plan PLAN-20260211-COMPRESSION.P14 * @requirement REQ-CS-001.6 */ private buildCompressionContext; getFinalUsageMetadata(chunks: GenerateContentResponse[]): GenerateContentResponseUsageMetadata | undefined; private processStreamResponse; /** * Records completed tool calls with full metadata. * This is called by external components when tool calls complete, before sending responses to Gemini. */ recordCompletedToolCalls(_model: string, _toolCalls: CompletedToolCall[]): void; private recordHistory; private hasTextContent; private maybeIncludeSchemaDepthContext; /** * Convert PartListUnion (user input) to IContent format for provider/history */ convertPartListUnionToIContent(input: PartListUnion): IContent; /** * Convert IContent (from provider) to GenerateContentResponse for SDK compatibility */ convertIContentToResponse(input: IContent): GenerateContentResponse; /** * Get the active provider from the ProviderManager via Config */ private getActiveProvider; private resolveProviderForRuntime; /** * Check if a provider supports the IContent interface */ private providerSupportsIContent; private resolveProviderBaseUrl; } /** Visible for Testing */ export declare function isSchemaDepthError(errorMessage: string): boolean;