import { type AuthToken, type CreateAuthTokenParameters, type LiveServerMessage, type LiveConnectConfig, type FunctionDeclaration, type FunctionResponse, type Content, type Blob as GeminiBlob } from '@google/genai'; import { BaseRealtimeModel, type ClientRealtimeSessionConfig, type IRealtimeSession, type RealtimeSessionParams, type RealtimeToolDefinition } from '@memberjunction/ai'; /** * The minimal subset of `@google/genai`'s `Session` that the realtime driver depends on. Declaring * the seam as an interface (rather than the concrete SDK `Session`) lets unit tests inject a fully * in-memory fake that drives the registered callbacks with Gemini-shaped messages and captures * outbound calls — no websocket, no network. */ export interface GeminiLiveSession { /** * Streams realtime user input to the model — media frames (audio now) AND mid-session * text. Realtime text is the Live API's "respond now" path for in-conversation messages: * native-audio models treat {@link sendClientContent} as history seeding only and will * NOT generate from it mid-call, while realtime text triggers immediately. */ sendRealtimeInput(params: { audio?: GeminiBlob; media?: GeminiBlob; text?: string; }): void; /** Appends client content (used to seed initial context) to the conversation. */ sendClientContent(params: { turns?: Content[]; turnComplete?: boolean; }): void; /** Replies to a server tool call with one or more function responses. */ sendToolResponse(params: { functionResponses: FunctionResponse[] | FunctionResponse; }): void; /** Terminates the underlying connection. */ close(): void; } /** * Concrete arguments handed to {@link GeminiRealtime.connectLiveSession}. Bundles the resolved * connect config (system instruction, tools, modalities, transcription) and the message callback so * the seam owns the entire `ai.live.connect` call and tests can substitute it wholesale. */ export interface GeminiConnectArgs { /** The Gemini Live model id to open the session against. */ Model: string; /** The fully-built connect config (system instruction, tools, modalities, transcription, plus the open config bag). */ Config: LiveConnectConfig; /** Invoked for every {@link LiveServerMessage} the server emits over the session. */ OnMessage: (message: LiveServerMessage) => void; /** Invoked on a websocket-level error (fatal — the session is unusable). Optional. */ OnError?: (event: ErrorEvent) => void; /** Invoked when the websocket closes. Optional. */ OnClose?: (event: CloseEvent) => void; } /** * Real-time, full-duplex driver for Google's **Gemini Live API**, implementing the Core * {@link BaseRealtimeModel} primitive. * * The driver opens a bidirectional Gemini Live session, streams client audio in, and translates the * provider's {@link LiveServerMessage} frames into the modality-agnostic Core events * ({@link RealtimeTranscript}, {@link RealtimeToolCall}, {@link RealtimeUsage}, output media, and * interruption). It registers via the MemberJunction class factory as `GeminiRealtime` and is * resolved for `MJ: AI Models` typed `Realtime`. * * **Testability:** the live-session creation is isolated behind the overridable * {@link connectLiveSession} seam, so unit tests inject a fake {@link GeminiLiveSession} and exercise * the full message→event translation with no network. */ export declare class GeminiRealtime extends BaseRealtimeModel { private geminiClient; private geminiTokenClient; constructor(apiKey: string); /** * Opens a Gemini Live session and returns the Core session handle that translates between the * provider's frames and the MemberJunction realtime contract. */ StartSession(params: RealtimeSessionParams): Promise; /** * Gemini Live supports the client-direct topology: the server mints a short-lived ephemeral * auth token (`v1alpha` `auth_tokens` API) that the browser uses to open its OWN Live * websocket, while the server keeps prompt/tool authority by LOCKING the connect config into * the token via `liveConnectConstraints`. */ get SupportsClientDirect(): boolean; /** * Mints an ephemeral, server-scoped Live credential for a **client-direct** session. * * The connect config is built EXACTLY as {@link StartSession} builds it (same * {@link buildConnectConfig}: audio modality, input+output transcription, system instruction, * mapped tools) and is **locked into the token** via `liveConnectConstraints` + * `lockAdditionalFields: []` — so the API ignores any attempt by the browser to change the * locked fields. The same config is ALSO carried in `SessionConfig` (as `{ model, config }`) * because the SDK still expects the client to pass a model/config at `live.connect` time; the * token-side lock is what makes the server's prompt and tool set authoritative. * * Expiry: the browser must open its session within * {@link GEMINI_CLIENT_TOKEN_NEW_SESSION_WINDOW_MS}; the token (and thus the session's * ability to send messages) dies at {@link GEMINI_CLIENT_TOKEN_EXPIRY_MS}. * * @param params Session configuration (model, system prompt, tools, config bag). * @returns The minted {@link ClientRealtimeSessionConfig} the browser authenticates + applies. */ CreateClientSession(params: RealtimeSessionParams): Promise; /** * Mint seam for the ephemeral auth token. Production routes through the SDK's * `authTokens.create` on a `v1alpha` client (ephemeral tokens are v1alpha-only); unit tests * override this to return a fake token with no network. * * @param params The auth-token create parameters (expiry, uses, live-connect constraints). * @returns The created {@link AuthToken} (its `name` is the credential the browser presents). */ protected mintAuthToken(params: CreateAuthTokenParameters): Promise; /** * Lazily constructs the `v1alpha` `GoogleGenAI` client used ONLY for auth-token minting * (the ephemeral-token API is exposed on `v1alpha`; the regular live client stays default). */ private ensureTokenClient; /** * Creation seam for the underlying Gemini Live session. * * Production code routes through `ai.live.connect`; unit tests override this method to inject a * fake {@link GeminiLiveSession}. Kept as a thin, single-responsibility method so the network * boundary is the *only* thing tests need to replace. * * @param args Resolved model, connect config, and the server-message callback. * @returns A promise resolving to the live session handle. */ protected connectLiveSession(args: GeminiConnectArgs): Promise; /** * Lazily constructs the `GoogleGenAI` client from the driver's API key. */ private ensureClient; /** * Builds the {@link LiveConnectConfig} from the Core session params: audio response modality, * input/output transcription, system instruction, mapped tools, plus any provider-specific * overrides from the open config bag. */ /** * Projects the full connect config down to the fields Gemini's ephemeral-token API accepts * as `liveConnectConstraints.config`. The token mint converts the provided keys into a * field mask over `BidiGenerateContentSetup`, and only generation-level fields are valid * there — `systemInstruction`, `tools`, and the transcription configs are NOT, and their * presence 400s the entire mint. Only defined fields are copied (an absent key must stay * absent so it doesn't enter the mask). */ static BuildConstraintConfig(config: LiveConnectConfig): LiveConnectConfig; private buildConnectConfig; /** * Maps Core {@link RealtimeToolDefinition}s up to Gemini {@link FunctionDeclaration}s. * * The Core `ParametersSchema` is a JSON-schema object, so it rides in `parametersJsonSchema` * (the SDK's JSON-schema slot) rather than the OpenAPI-style `parameters` slot. */ static MapToolsToFunctionDeclarations(tools: RealtimeToolDefinition[]): FunctionDeclaration[]; } //# sourceMappingURL=geminiRealtime.d.ts.map