/** * Shared utilities for Google Generative AI and Google Cloud Code Assist providers. */ import type { AssistantMessage, Context, FetchImpl, Model, ServiceTier, StopReason, StreamOptions, TextContent, ThinkingContent, Tool, ToolCall } from "../types"; import { AssistantMessageEventStream } from "../utils/event-stream"; import { normalizeSchemaForGoogle } from "../utils/schema"; import type { Content, FinishReason, FunctionCallingConfigMode, GenerateContentParameters, GenerateContentResponse, Part } from "./google-types"; export type { Content, FunctionCallingConfigMode, GenerateContentParameters, GenerateContentResponse, ThinkingConfig, } from "./google-types"; export { normalizeSchemaForGoogle }; type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex"; /** * Thinking level for Gemini 3 models. Mirrors Google's `ThinkingLevel` enum values. * Defined here (not in any specific provider) so all Google providers can reference it * without inducing a circular dependency. */ export type GoogleThinkingLevel = "THINKING_LEVEL_UNSPECIFIED" | "MINIMAL" | "LOW" | "MEDIUM" | "HIGH"; /** * Sampling/thinking options shared by `streamGoogle` and `streamGoogleVertex`. * `google-gemini-cli` uses a different transport and request shape — do not extend this for it. */ export interface GoogleSharedStreamOptions extends StreamOptions { /** * Tool selection mode. String forms map directly to Gemini * `FunctionCallingConfigMode`. The object form forces a single named tool * — `mode: "ANY"` is wire-required when `allowedFunctionNames` is set. */ toolChoice?: "auto" | "none" | "any" | { mode: "ANY"; allowedFunctionNames: [string, ...string[]]; }; thinking?: { enabled: boolean; budgetTokens?: number; level?: GoogleThinkingLevel; }; /** Gemini/Vertex serving tier (`flex`/`priority`); other values are omitted. */ serviceTier?: ServiceTier; /** * Continues a Gemini Interactions API conversation from a stored interaction. * When set on the direct Google provider, the request uses `/interactions` * with `previous_interaction_id` instead of the legacy generateContent stream. */ previousInteractionId?: string; /** * Uses the Gemini Interactions API for direct Google requests, storing the * returned interaction id on the assistant response for follow-up turns. */ useInteractionsApi?: boolean; /** Overrides Interactions API request storage; default is the API default (`true`). */ storeInteraction?: boolean; } /** * Determines whether a streamed Gemini `Part` should be treated as "thinking". * * Protocol note (Gemini / Vertex AI thought signatures): * - `thought: true` is the definitive marker for thinking content (thought summaries). * - `thoughtSignature` is an encrypted representation of the model's internal thought process * used to preserve reasoning context across multi-turn interactions. * - `thoughtSignature` can appear on ANY part type (text, functionCall, etc.) - it does NOT * indicate the part itself is thinking content. * - For non-functionCall responses, the signature appears on the last part for context replay. * - When persisting/replaying model outputs, signature-bearing parts must be preserved as-is; * do not merge/move signatures across parts. * * See: https://ai.google.dev/gemini-api/docs/thought-signatures */ export declare function isThinkingPart(part: Pick): boolean; /** * Retain thought signatures during streaming. * * Some backends only send `thoughtSignature` on the first delta for a given part/block; later deltas may omit it. * This helper preserves the last non-empty signature for the current block. * * Note: this does NOT merge or move signatures across distinct response parts. It only prevents * a signature from being overwritten with `undefined` within the same streamed block. */ export declare function retainThoughtSignature(existing: string | undefined, incoming: string | undefined): string | undefined; /** * Convert internal messages to Gemini Content[] format. */ export declare function convertMessages(model: Model, context: Context): Content[]; /** * Convert tools to Gemini function declarations format. * * We prefer `parametersJsonSchema` (full JSON Schema: anyOf/oneOf/const/etc.). * * Claude models via Cloud Code Assist require the legacy `parameters` field; the API * translates it into Anthropic's `input_schema`. When using that path, we sanitize the * schema to remove Google-unsupported JSON Schema keywords. */ export declare function convertTools(tools: Tool[], model: Model<"google-generative-ai" | "google-gemini-cli" | "google-vertex">): { functionDeclarations: Record[]; }[] | undefined; /** * Map tool choice string to Gemini FunctionCallingConfigMode. */ export declare function mapToolChoice(choice: string): FunctionCallingConfigMode; /** * Map Gemini FinishReason to our StopReason. */ export declare function mapStopReason(reason: FinishReason): StopReason; /** * Map string finish reason to our StopReason (for raw API responses). */ export declare function mapStopReasonString(reason: string): StopReason; /** * Bounded retries for the well-known Gemini "empty response" failure: a benign * `finishReason: STOP` carrying only an empty/whitespace text part and no tool call. * Shared by the public/Vertex `streamGoogleGenAI` path and the Cloud Code Assist * (`google-gemini-cli`/`google-antigravity`) provider so both apply the same policy. */ export declare const MAX_EMPTY_STREAM_RETRIES = 2; export declare const EMPTY_STREAM_BASE_DELAY_MS = 500; /** * Whether a completed Google assistant message carries content worth delivering. * * A tool call or any non-whitespace text counts as meaningful. An empty/whitespace-only * text part — or thinking that never produced an answer — is the "empty response" failure: * delivered as-is the agent loop has nothing to act on and silently halts, so the request * must be retried instead of surfaced. */ export declare function hasMeaningfulGoogleContent(output: AssistantMessage): boolean; export declare function nextToolCallId(name: string): string; /** * Push the appropriate `text_end` / `thinking_end` event for the given block. * Shared between the SDK-backed stream consumer and the gemini-cli SSE consumer so * the end-of-block event shape stays in lockstep. */ export declare function pushBlockEndEvent(block: TextContent | ThinkingContent, contentIndex: number, output: AssistantMessage, stream: AssistantMessageEventStream): void; /** * Push the three lifecycle events (`toolcall_start` / `toolcall_delta` / `toolcall_end`) for a * fully-assembled `ToolCall`. Caller is responsible for appending the toolCall to `output.content` * before invoking — this helper does not mutate `output.content`. */ export declare function pushToolCallEvents(toolCall: ToolCall, contentIndex: number, output: AssistantMessage, stream: AssistantMessageEventStream): void; /** * Append a new text- or thinking-block to `output.content` and push the matching * `text_start` / `thinking_start` event. `onBeforeStartEvent` lets the SSE consumer * inject its `ensureStarted()` first-token side effect into the canonical event order. */ export declare function startTextOrThinkingBlock(isThinking: true, output: AssistantMessage, stream: AssistantMessageEventStream, onBeforeStartEvent?: () => void): ThinkingContent; export declare function startTextOrThinkingBlock(isThinking: false, output: AssistantMessage, stream: AssistantMessageEventStream, onBeforeStartEvent?: () => void): TextContent; export declare function startTextOrThinkingBlock(isThinking: boolean, output: AssistantMessage, stream: AssistantMessageEventStream, onBeforeStartEvent?: () => void): TextContent | ThinkingContent; /** * Drives the chunked `generateContentStream` iterator into an `AssistantMessage` and * the corresponding `AssistantMessageEventStream`. Shared between `streamGoogle` and * `streamGoogleVertex` — every observable event order and stop-reason rule is preserved. * * The caller still owns: `output` construction, timing fields (`duration`/`ttft`), * `rawRequestDump`, the `client.models.generateContentStream(params)` call itself, * pushing `start`/`done`/`error` events, and the surrounding try/catch that translates * thrown errors into `output.stopReason`/`errorMessage`. * * This helper handles: the chunk loop, currentBlock flush transitions, usage metadata * decoding (`calculateCost` included), tool-call id collision avoidance, finish-reason * mapping, and the abort/stop-reason post-checks that re-throw to bubble into the * caller's catch. */ export declare function consumeGoogleStream(args: { googleStream: AsyncIterable; output: AssistantMessage; stream: AssistantMessageEventStream; model: Model; options: { signal?: AbortSignal; } | undefined; /** Vertex preserves `textSignature` on streamed text deltas; google-generative-ai does not. */ retainTextSignature?: boolean; onFirstToken?: () => void; }): Promise; /** * Build the `GenerateContentParameters` payload for the public Gemini API and Vertex AI. * Both surfaces accept the same `GenerateContentConfig` shape — every numeric/string knob, * tool-config, thinking-config, and system-instruction conversion is identical. * * `google-gemini-cli` is NOT routed through here: its `CloudCodeAssistRequest` body has a * distinct top-level shape (project/request/requestType) and a different thinking-config * placement on `generationConfig`. */ export declare function buildGoogleGenerateContentParams(model: Model, context: Context, options: GoogleSharedStreamOptions): GenerateContentParameters; /** * Drive the `streamGoogle` / `streamGoogleVertex` event flow: build the assistant message, * push start/done/error events, run `consumeGoogleStream`, and translate thrown errors into * the canonical `error` event shape. * * Caller-supplied `prepare()` runs inside the try-block so any failure (missing project, * bad auth, etc.) is funneled through the same error path as a streaming failure. */ export interface GoogleGenAIRequestPlan { params: GenerateContentParameters; url: string; headers: Record; fetch?: FetchImpl; /** Optional URL retried once when {@link url} returns 404 (regional Vertex endpoint missing a global-only model). */ fallbackUrl?: string; } export declare function streamGoogleGenAI(args: { model: Model; options: GoogleSharedStreamOptions | undefined; api: T; retainTextSignature?: boolean; prepare: () => GoogleGenAIRequestPlan | Promise; }): AssistantMessageEventStream;