// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. import { APIResource } from '../../core/resource'; import * as MessagesAPI from './messages'; import * as ToolsAPI from '../tools'; import * as AgentsAPI from './agents'; import * as ConversationsMessagesAPI from '../conversations/messages'; import * as RunsAPI from '../runs/runs'; import { APIPromise } from '../../core/api-promise'; import { ArrayPage, type ArrayPageParams, PagePromise } from '../../core/pagination'; import { Stream } from '../../core/streaming'; import { RequestOptions } from '../../internal/request-options'; import { path } from '../../internal/utils/path'; export class Messages extends APIResource { /** * Process a user message and return the agent's response. This endpoint accepts a * message from a user and processes it through the agent. * * **Note:** Sending multiple concurrent requests to the same agent can lead to * undefined behavior. Each agent processes messages sequentially, and concurrent * requests may interleave in unexpected ways. Wait for each request to complete * before sending the next one. Use separate agents or conversations for parallel * processing. * * The response format is controlled by the `streaming` field in the request body: * * - If `streaming=false` (default): Returns a complete LettaResponse with all * messages * - If `streaming=true`: Returns a Server-Sent Events (SSE) stream * * Additional streaming options (only used when streaming=true): * * - `stream_tokens`: Stream individual tokens instead of complete steps * - `include_pings`: Include keepalive pings to prevent connection timeouts * - `background`: Process the request in the background */ create( agentID: string, body: MessageCreateParamsNonStreaming, options?: RequestOptions, ): APIPromise; create( agentID: string, body: MessageCreateParamsStreaming, options?: RequestOptions, ): APIPromise>; create( agentID: string, body: MessageCreateParamsBase, options?: RequestOptions, ): APIPromise | LettaResponse>; create( agentID: string, body: MessageCreateParams, options?: RequestOptions, ): APIPromise | APIPromise> { return this._client.post(path`/v1/agents/${agentID}/messages`, { body, ...options, stream: body.streaming ?? false, }) as APIPromise | APIPromise>; } /** * Retrieve message history for an agent. */ list( agentID: string, query: MessageListParams | null | undefined = {}, options?: RequestOptions, ): PagePromise { return this._client.getAPIList(path`/v1/agents/${agentID}/messages`, ArrayPage, { query, ...options, }); } /** * Cancel runs associated with an agent. If run_ids are passed in, cancel those in * particular. * * Note to cancel active runs associated with an agent, redis is required. */ cancel( agentID: string, body: MessageCancelParams | null | undefined = {}, options?: RequestOptions, ): APIPromise { return this._client.post(path`/v1/agents/${agentID}/messages/cancel`, { body, ...options }); } /** * Summarize an agent's conversation history. */ compact( agentID: string, body: MessageCompactParams | null | undefined = {}, options?: RequestOptions, ): APIPromise { return this._client.post(path`/v1/agents/${agentID}/summarize`, { body, ...options }); } /** * Asynchronously process a user message and return a run object. The actual * processing happens in the background, and the status can be checked using the * run ID. * * This is "asynchronous" in the sense that it's a background run and explicitly * must be fetched by the run ID. * * **Note:** Sending multiple concurrent requests to the same agent can lead to * undefined behavior. Each agent processes messages sequentially, and concurrent * requests may interleave in unexpected ways. Wait for each request to complete * before sending the next one. Use separate agents or conversations for parallel * processing. */ createAsync(agentID: string, body: MessageCreateAsyncParams, options?: RequestOptions): APIPromise { return this._client.post(path`/v1/agents/${agentID}/messages/async`, { body, ...options }); } /** * Resets the messages for an agent */ reset( agentID: string, body: MessageResetParams, options?: RequestOptions, ): APIPromise { return this._client.patch(path`/v1/agents/${agentID}/reset-messages`, { body, ...options }); } /** * Process a user message and return the agent's response. * * Deprecated: Use the `POST /{agent_id}/messages` endpoint with `streaming=true` * in the request body instead. * * **Note:** Sending multiple concurrent requests to the same agent can lead to * undefined behavior. Each agent processes messages sequentially, and concurrent * requests may interleave in unexpected ways. Wait for each request to complete * before sending the next one. Use separate agents or conversations for parallel * processing. * * This endpoint accepts a message from a user and processes it through the agent. * It will stream the steps of the response always, and stream the tokens if * 'stream_tokens' is set to True. * * @deprecated */ stream( agentID: string, body: MessageStreamParams, options?: RequestOptions, ): APIPromise> { return this._client.post(path`/v1/agents/${agentID}/messages/stream`, { body, ...options, stream: true, }) as APIPromise>; } } export type MessagesArrayPage = ArrayPage; export type RunsArrayPage = ArrayPage; /** * Input to approve or deny a tool call request */ export interface ApprovalCreate { /** * @deprecated The message ID of the approval request */ approval_request_id?: string | null; /** * The list of approval responses */ approvals?: Array | null; /** * @deprecated Whether the tool has been approved */ approve?: boolean | null; /** * The multi-agent group that the message was sent in */ group_id?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; /** * @deprecated An optional explanation for the provided approval status */ reason?: string | null; /** * The message type to be created. */ type?: 'approval'; } /** * A message representing a request for approval to call a tool (generated by the * LLM to trigger tool execution). * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message tool_call (ToolCall): The tool call */ export interface ApprovalRequestMessage { id: string; date: string; /** * @deprecated The tool call that has been requested by the llm to run */ tool_call: ToolCall | ToolCallDelta; is_err?: boolean | null; /** * The type of the message. */ message_type?: 'approval_request_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; /** * The tool calls that have been requested by the llm to run, which are pending * approval */ tool_calls?: Array | ToolCallDelta | null; } /** * A message representing a response form the user indicating whether a tool has * been approved to run. * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message approve: (bool) Whether the tool has been approved approval_request_id: * The ID of the approval request reason: (Optional[str]) An optional explanation * for the provided approval status */ export interface ApprovalResponseMessage { id: string; date: string; /** * @deprecated The message ID of the approval request */ approval_request_id?: string | null; /** * The list of approval responses */ approvals?: Array | null; /** * @deprecated Whether the tool has been approved */ approve?: boolean | null; is_err?: boolean | null; /** * The type of the message. */ message_type?: 'approval_response_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; /** * @deprecated An optional explanation for the provided approval status */ reason?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; } export interface ApprovalReturn { /** * Whether the tool has been approved */ approve: boolean; /** * The ID of the tool call that corresponds to this approval */ tool_call_id: string; /** * An optional explanation for the provided approval status */ reason?: string | null; /** * The message type to be created. */ type?: 'approval'; } /** * A message sent by the LLM in response to user input. Used in the LLM context. * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message content (Union[str, List[LettaAssistantMessageContentUnion]]): The * message content sent by the agent (can be a string or an array of content parts) */ export interface AssistantMessage { id: string; /** * The message content sent by the agent (can be a string or an array of content * parts) */ content: Array | string; date: string; is_err?: boolean | null; /** * The type of the message. */ message_type?: 'assistant_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; } /** * A message for notifying the developer that an event that has occured (e.g. a * compaction). Events are NOT part of the context window. */ export interface EventMessage { id: string; date: string; event_data: { [key: string]: unknown }; event_type: 'compaction'; is_err?: boolean | null; message_type?: 'event_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; } /** * Representation of an agent's internal reasoning where reasoning content has been * hidden from the response. * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message state (Literal["redacted", "omitted"]): Whether the reasoning content * was redacted by the provider or simply omitted by the API hidden_reasoning * (Optional[str]): The internal reasoning of the agent */ export interface HiddenReasoningMessage { id: string; date: string; state: 'redacted' | 'omitted'; hidden_reasoning?: string | null; is_err?: boolean | null; /** * The type of the message. */ message_type?: 'hidden_reasoning_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; } export interface ImageContent { /** * The source of the image. */ source: ImageContent.URLImage | ImageContent.Base64Image | ImageContent.LettaImage; /** * The type of the message. */ type?: 'image'; } export namespace ImageContent { export interface URLImage { /** * The URL of the image. */ url: string; /** * The source type for the image. */ type?: 'url'; } export interface Base64Image { /** * The base64 encoded image data. */ data: string; /** * The media type for the image. */ media_type: string; /** * What level of detail to use when processing and understanding the image (low, * high, or auto to let the model decide) */ detail?: string | null; /** * The source type for the image. */ type?: 'base64'; } export interface LettaImage { /** * The unique identifier of the image file persisted in storage. */ file_id: string; /** * The base64 encoded image data. */ data?: string | null; /** * What level of detail to use when processing and understanding the image (low, * high, or auto to let the model decide) */ detail?: string | null; /** * The media type for the image. */ media_type?: string | null; /** * The source type for the image. */ type?: 'letta'; } } /** * Letta's internal representation of a message. Includes methods to convert to/from LLM provider formats. * * Attributes: * id (str): The unique identifier of the message. * role (MessageRole): The role of the participant. * text (str): The text of the message. * user_id (str): The unique identifier of the user. * agent_id (str): The unique identifier of the agent. * model (str): The model used to make the function call. * name (str): The name of the participant. * created_at (datetime): The time the message was created. * tool_calls (List[OpenAIToolCall,]): The list of tool calls requested. * tool_call_id (str): The id of the tool call. * step_id (str): The id of the step that this message was created in. * otid (str): The offline threading id associated with this message. * tool_returns (List[ToolReturn]): The list of tool returns requested. * group_id (str): The multi-agent group that the message was sent in. * sender_id (str): The id of the sender of the message, can be an identity id or agent id. * conversation_id (str): The conversation this message belongs to. * * t */ export interface InternalMessage { /** * The human-friendly ID of the Message */ id: string; /** * The role of the participant. */ role: MessageRole; /** * The unique identifier of the agent. */ agent_id?: string | null; /** * The id of the approval request if this message is associated with a tool call * request. */ approval_request_id?: string | null; /** * The list of approvals for this message. */ approvals?: Array | null; /** * Whether tool call is approved. */ approve?: boolean | null; /** * The id of the LLMBatchItem that this message is associated with */ batch_item_id?: string | null; /** * The content of the message. */ content?: Array< | TextContent | ImageContent | ToolCallContent | ToolReturnContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent | InternalMessage.SummarizedReasoningContent > | null; /** * The conversation this message belongs to */ conversation_id?: string | null; /** * The timestamp when the object was created. */ created_at?: string; /** * The id of the user that made this object. */ created_by_id?: string | null; /** * The reason the tool call request was denied. */ denial_reason?: string | null; /** * The multi-agent group that the message was sent in */ group_id?: string | null; /** * Whether this message is part of an error step. Used only for debugging purposes. */ is_err?: boolean | null; /** * The id of the user that made this object. */ last_updated_by_id?: string | null; /** * The model used to make the function call. */ model?: string | null; /** * For role user/assistant: the (optional) name of the participant. For role * tool/function: the name of the function called. */ name?: string | null; /** * The offline threading id associated with this message */ otid?: string | null; /** * The id of the run that this message was created in. */ run_id?: string | null; /** * The id of the sender of the message, can be an identity id or agent id */ sender_id?: string | null; /** * The id of the step that this message was created in. */ step_id?: string | null; /** * The ID of the tool call. Only applicable for role tool. */ tool_call_id?: string | null; /** * The list of tool calls requested. Only applicable for role assistant. */ tool_calls?: Array | null; /** * Tool execution return information for prior tool calls */ tool_returns?: Array | null; /** * The timestamp when the object was last updated. */ updated_at?: string | null; } export namespace InternalMessage { export interface LettaSchemasMessageToolReturnOutput { /** * The status of the tool call */ status: 'success' | 'error'; /** * The function response - either a string or list of content parts (text/image) */ func_response?: string | Array | null; /** * Captured stderr from the tool invocation */ stderr?: Array | null; /** * Captured stdout (e.g. prints, logs) from the tool invocation */ stdout?: Array | null; /** * The ID for the tool call */ tool_call_id?: unknown; } /** * The style of reasoning content returned by the OpenAI Responses API */ export interface SummarizedReasoningContent { /** * The unique identifier for this reasoning step. */ id: string; /** * Summaries of the reasoning content. */ summary: Array; /** * The encrypted reasoning content. */ encrypted_content?: string; /** * Indicates this is a summarized reasoning step. */ type?: 'summarized_reasoning'; } export namespace SummarizedReasoningContent { export interface Summary { /** * The index of the summary part. */ index: number; /** * The text of the summary part. */ text: string; } } /** * A call to a function tool created by the model. */ export interface ToolCall { id: string; /** * The function that the model called. */ function: ToolCall.Function; type: 'function'; [k: string]: unknown; } export namespace ToolCall { /** * The function that the model called. */ export interface Function { arguments: string; name: string; [k: string]: unknown; } } export interface ToolReturn { /** * The status of the tool call */ status: 'success' | 'error'; /** * The function response - either a string or list of content parts (text/image) */ func_response?: string | Array | null; /** * Captured stderr from the tool invocation */ stderr?: Array | null; /** * Captured stdout (e.g. prints, logs) from the tool invocation */ stdout?: Array | null; /** * The ID for the tool call */ tool_call_id?: unknown; } } /** * Status of the job. */ export type JobStatus = 'created' | 'running' | 'completed' | 'failed' | 'pending' | 'cancelled' | 'expired'; export type JobType = 'job' | 'run' | 'batch'; export interface LettaAssistantMessageContentUnion { /** * The text content of the message. */ text: string; /** * Stores a unique identifier for any reasoning associated with this text content. */ signature?: string | null; /** * The type of the message. */ type?: 'text'; } export interface LettaRequest { /** * @deprecated The name of the message argument in the designated message tool. * Still supported for legacy agent types, but deprecated for letta_v1_agent * onward. */ assistant_message_tool_kwarg?: string; /** * @deprecated The name of the designated message tool. Still supported for legacy * agent types, but deprecated for letta_v1_agent onward. */ assistant_message_tool_name?: string; /** * Client-side skills available in the environment. These are rendered in the * system prompt's available skills section alongside agent-scoped skills from * MemFS. */ client_skills?: Array | null; /** * Client-side tools that the agent can call. When the agent calls a client-side * tool, execution pauses and returns control to the client to execute the tool and * provide the result via a ToolReturn. */ client_tools?: Array | null; /** * @deprecated If set to True, enables reasoning before responses or tool calls * from the agent. */ enable_thinking?: string; /** * If True, compaction events emit structured `SummaryMessage` and `EventMessage` * types. If False (default), compaction messages are not included in the response. */ include_compaction_messages?: boolean; /** * Only return specified message types in the response. If `None` (default) returns * all messages. */ include_return_message_types?: Array | null; /** * Syntactic sugar for a single user message. Equivalent to messages=[{'role': * 'user', 'content': input}]. */ input?: | string | Array< | TextContent | ImageContent | ToolCallContent | ToolReturnContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent | LettaRequest.SummarizedReasoningContent > | null; /** * Maximum number of steps the agent should take to process the request. */ max_steps?: number; /** * The messages to be sent to the agent. */ messages?: Array | null; /** * Model handle to use for this request instead of the agent's default model. This * allows sending a message to a different model without changing the agent's * configuration. */ override_model?: string | null; /** * Optional per-request system prompt override. When set, this is passed directly * to the underlying LLM request and bypasses the persisted/compiled system message * for that request. */ override_system?: string | null; /** * If True, returns log probabilities of the output tokens in the response. Useful * for RL training. Only supported for OpenAI-compatible providers (including * SGLang). */ return_logprobs?: boolean; /** * If True, returns token IDs and logprobs for ALL LLM generations in the agent * step, not just the last one. Uses SGLang native /generate endpoint. Returns * 'turns' field with TurnTokenData for each assistant/tool turn. Required for * proper multi-turn RL training with loss masking. */ return_token_ids?: boolean; /** * Number of most likely tokens to return at each position (0-20). Requires * return_logprobs=True. */ top_logprobs?: number | null; /** * @deprecated Whether the server should parse specific tool call arguments * (default `send_message`) as `AssistantMessage` objects. Still supported for * legacy agent types, but deprecated for letta_v1_agent onward. */ use_assistant_message?: boolean; } export namespace LettaRequest { /** * Schema for a client-side skill passed in the request. * * Client-side skills represent environment-provided capabilities (e.g. * project-scoped skills) that are not stored in the agent's MemFS but should * appear in the system prompt's available skills section. */ export interface ClientSkill { /** * Description of what the skill does */ description: string; /** * Path or location hint for the skill (e.g. skills/my-skill/SKILL.md) */ location: string; /** * The name of the skill */ name: string; } /** * Schema for a client-side tool passed in the request. * * Client-side tools are executed by the client, not the server. When the agent * calls a client-side tool, execution pauses and returns control to the client to * execute the tool and provide the result. */ export interface ClientTool { /** * The name of the tool function */ name: string; /** * Description of what the tool does */ description?: string | null; /** * JSON Schema for the function parameters */ parameters?: { [key: string]: unknown } | null; } /** * The style of reasoning content returned by the OpenAI Responses API */ export interface SummarizedReasoningContent { /** * The unique identifier for this reasoning step. */ id: string; /** * Summaries of the reasoning content. */ summary: Array; /** * The encrypted reasoning content. */ encrypted_content?: string; /** * Indicates this is a summarized reasoning step. */ type?: 'summarized_reasoning'; } export namespace SummarizedReasoningContent { export interface Summary { /** * The index of the summary part. */ index: number; /** * The text of the summary part. */ text: string; } } /** * Submit tool return(s) from client-side tool execution. * * This is the preferred way to send tool results back to the agent after * client-side tool execution. It is equivalent to sending an ApprovalCreate with * tool return approvals, but provides a cleaner API for the common case. */ export interface ToolReturnCreate { /** * List of tool returns from client-side execution */ tool_returns: Array; /** * The multi-agent group that the message was sent in */ group_id?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; /** * The message type to be created. */ type?: 'tool_return'; } } /** * Response object from an agent interaction, consisting of the new messages * generated by the agent and usage statistics. The type of the returned messages * can be either `Message` or `LettaMessage`, depending on what was specified in * the request. * * Attributes: messages (List[Union[Message, LettaMessage]]): The messages returned * by the agent. usage (LettaUsageStatistics): The usage statistics */ export interface LettaResponse { /** * The messages returned by the agent. */ messages: Array; /** * The stop reason from Letta indicating why agent loop stopped execution. */ stop_reason: LettaResponse.StopReason; /** * The usage statistics of the agent. */ usage: LettaResponse.Usage; /** * Log probabilities of the output tokens from the last LLM call. Only present if * return_logprobs was enabled. */ logprobs?: LettaResponse.Logprobs | null; /** * Token data for all LLM generations in multi-turn agent interaction. Includes * token IDs and logprobs for each assistant turn, plus tool result content. Only * present if return_token_ids was enabled. Used for RL training with loss masking. */ turns?: Array | null; } export namespace LettaResponse { /** * The stop reason from Letta indicating why agent loop stopped execution. */ export interface StopReason { /** * The reason why execution stopped. */ stop_reason: RunsAPI.StopReasonType; /** * The type of the message. */ message_type?: 'stop_reason'; } /** * The usage statistics of the agent. */ export interface Usage { /** * The number of input tokens written to cache (Anthropic only). None if not * reported by provider. */ cache_write_tokens?: number | null; /** * The number of input tokens served from cache. None if not reported by provider. */ cached_input_tokens?: number | null; /** * The number of tokens generated by the agent. */ completion_tokens?: number; /** * Estimate of tokens currently in the context window. */ context_tokens?: number | null; message_type?: 'usage_statistics'; /** * The number of tokens in the prompt. */ prompt_tokens?: number; /** * The number of reasoning/thinking tokens generated. None if not reported by * provider. */ reasoning_tokens?: number | null; /** * The background task run IDs associated with the agent interaction */ run_ids?: Array | null; /** * The number of steps taken by the agent. */ step_count?: number; /** * The total number of tokens processed by the agent. */ total_tokens?: number; } /** * Log probabilities of the output tokens from the last LLM call. Only present if * return_logprobs was enabled. */ export interface Logprobs { content?: Array | null; refusal?: Array | null; } export namespace Logprobs { export interface Content { token: string; logprob: number; top_logprobs: Array; bytes?: Array | null; } export namespace Content { export interface TopLogprob { token: string; logprob: number; bytes?: Array | null; } } export interface Refusal { token: string; logprob: number; top_logprobs: Array; bytes?: Array | null; } export namespace Refusal { export interface TopLogprob { token: string; logprob: number; bytes?: Array | null; } } } /** * Token data for a single LLM generation turn in a multi-turn agent interaction. * * Used for RL training to track token IDs and logprobs across all LLM calls, not * just the final one. Tool results are included so the client can tokenize them * with loss_mask=0 (non-trainable). */ export interface Turn { /** * Role of this turn: 'assistant' for LLM generations (trainable), 'tool' for tool * results (non-trainable). */ role: 'assistant' | 'tool'; /** * Text content. For tool turns, client tokenizes this with loss_mask=0. */ content?: string | null; /** * Token IDs from SGLang native endpoint. Only present for assistant turns. */ output_ids?: Array | null; /** * Logprobs from SGLang: [[logprob, token_id, top_logprob_or_null], ...]. Only * present for assistant turns. */ output_token_logprobs?: Array> | null; /** * Name of the tool called. Only present for tool turns. */ tool_name?: string | null; } } export interface LettaStreamingRequest { /** * @deprecated The name of the message argument in the designated message tool. * Still supported for legacy agent types, but deprecated for letta_v1_agent * onward. */ assistant_message_tool_kwarg?: string; /** * @deprecated The name of the designated message tool. Still supported for legacy * agent types, but deprecated for letta_v1_agent onward. */ assistant_message_tool_name?: string; /** * Whether to process the request in the background (only used when * streaming=true). */ background?: boolean; /** * Client-side skills available in the environment. These are rendered in the * system prompt's available skills section alongside agent-scoped skills from * MemFS. */ client_skills?: Array | null; /** * Client-side tools that the agent can call. When the agent calls a client-side * tool, execution pauses and returns control to the client to execute the tool and * provide the result via a ToolReturn. */ client_tools?: Array | null; /** * @deprecated If set to True, enables reasoning before responses or tool calls * from the agent. */ enable_thinking?: string; /** * If True, compaction events emit structured `SummaryMessage` and `EventMessage` * types. If False (default), compaction messages are not included in the response. */ include_compaction_messages?: boolean; /** * Whether to include periodic keepalive ping messages in the stream to prevent * connection timeouts (only used when streaming=true). */ include_pings?: boolean; /** * Only return specified message types in the response. If `None` (default) returns * all messages. */ include_return_message_types?: Array | null; /** * Syntactic sugar for a single user message. Equivalent to messages=[{'role': * 'user', 'content': input}]. */ input?: | string | Array< | TextContent | ImageContent | ToolCallContent | ToolReturnContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent | LettaStreamingRequest.SummarizedReasoningContent > | null; /** * Maximum number of steps the agent should take to process the request. */ max_steps?: number; /** * The messages to be sent to the agent. */ messages?: Array | null; /** * Model handle to use for this request instead of the agent's default model. This * allows sending a message to a different model without changing the agent's * configuration. */ override_model?: string | null; /** * Optional per-request system prompt override. When set, this is passed directly * to the underlying LLM request and bypasses the persisted/compiled system message * for that request. */ override_system?: string | null; /** * If True, returns log probabilities of the output tokens in the response. Useful * for RL training. Only supported for OpenAI-compatible providers (including * SGLang). */ return_logprobs?: boolean; /** * If True, returns token IDs and logprobs for ALL LLM generations in the agent * step, not just the last one. Uses SGLang native /generate endpoint. Returns * 'turns' field with TurnTokenData for each assistant/tool turn. Required for * proper multi-turn RL training with loss masking. */ return_token_ids?: boolean; /** * Flag to determine if individual tokens should be streamed, rather than streaming * per step (only used when streaming=true). */ stream_tokens?: boolean; /** * If True, returns a streaming response (Server-Sent Events). If False (default), * returns a complete response. */ streaming?: boolean; /** * Number of most likely tokens to return at each position (0-20). Requires * return_logprobs=True. */ top_logprobs?: number | null; /** * @deprecated Whether the server should parse specific tool call arguments * (default `send_message`) as `AssistantMessage` objects. Still supported for * legacy agent types, but deprecated for letta_v1_agent onward. */ use_assistant_message?: boolean; } export namespace LettaStreamingRequest { /** * Schema for a client-side skill passed in the request. * * Client-side skills represent environment-provided capabilities (e.g. * project-scoped skills) that are not stored in the agent's MemFS but should * appear in the system prompt's available skills section. */ export interface ClientSkill { /** * Description of what the skill does */ description: string; /** * Path or location hint for the skill (e.g. skills/my-skill/SKILL.md) */ location: string; /** * The name of the skill */ name: string; } /** * Schema for a client-side tool passed in the request. * * Client-side tools are executed by the client, not the server. When the agent * calls a client-side tool, execution pauses and returns control to the client to * execute the tool and provide the result. */ export interface ClientTool { /** * The name of the tool function */ name: string; /** * Description of what the tool does */ description?: string | null; /** * JSON Schema for the function parameters */ parameters?: { [key: string]: unknown } | null; } /** * The style of reasoning content returned by the OpenAI Responses API */ export interface SummarizedReasoningContent { /** * The unique identifier for this reasoning step. */ id: string; /** * Summaries of the reasoning content. */ summary: Array; /** * The encrypted reasoning content. */ encrypted_content?: string; /** * Indicates this is a summarized reasoning step. */ type?: 'summarized_reasoning'; } export namespace SummarizedReasoningContent { export interface Summary { /** * The index of the summary part. */ index: number; /** * The text of the summary part. */ text: string; } } /** * Submit tool return(s) from client-side tool execution. * * This is the preferred way to send tool results back to the agent after * client-side tool execution. It is equivalent to sending an ApprovalCreate with * tool return approvals, but provides a cleaner API for the common case. */ export interface ToolReturnCreate { /** * List of tool returns from client-side execution */ tool_returns: Array; /** * The multi-agent group that the message was sent in */ group_id?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; /** * The message type to be created. */ type?: 'tool_return'; } } /** * Streaming response type for Server-Sent Events (SSE) endpoints. Each event in * the stream will be one of these types. */ export type LettaStreamingResponse = | SystemMessage | UserMessage | ReasoningMessage | HiddenReasoningMessage | ToolCallMessage | ToolsAPI.ToolReturnMessage | AssistantMessage | ApprovalRequestMessage | ApprovalResponseMessage | LettaStreamingResponse.LettaPing | LettaStreamingResponse.LettaErrorMessage | LettaStreamingResponse.LettaStopReason | LettaStreamingResponse.LettaUsageStatistics; export namespace LettaStreamingResponse { /** * A ping message used as a keepalive to prevent SSE streams from timing out during * long running requests. * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format */ export interface LettaPing { id: string; date: string; is_err?: boolean | null; /** * The type of the message. Ping messages are a keep-alive to prevent SSE streams * from timing out during long running requests. */ message_type?: 'ping'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; } /** * Error messages are used to notify the client of an error that occurred during * the agent's execution. */ export interface LettaErrorMessage { /** * The type of error. */ error_type: string; /** * The error message. */ message: string; /** * The type of the message. */ message_type: 'error_message'; /** * The ID of the run. */ run_id: string; /** * An optional error detail. */ detail?: string; /** * The sequence ID for cursor-based pagination. */ seq_id?: number; } /** * The stop reason from Letta indicating why agent loop stopped execution. */ export interface LettaStopReason { /** * The reason why execution stopped. */ stop_reason: RunsAPI.StopReasonType; /** * The type of the message. */ message_type?: 'stop_reason'; } /** * Usage statistics for the agent interaction. * * Attributes: completion_tokens (int): The number of tokens generated by the * agent. prompt_tokens (int): The number of tokens in the prompt. total_tokens * (int): The total number of tokens processed by the agent. step_count (int): The * number of steps taken by the agent. cached_input_tokens (Optional[int]): The * number of input tokens served from cache. None if not reported. * cache_write_tokens (Optional[int]): The number of input tokens written to cache. * None if not reported. reasoning_tokens (Optional[int]): The number of * reasoning/thinking tokens generated. None if not reported. */ export interface LettaUsageStatistics { /** * The number of input tokens written to cache (Anthropic only). None if not * reported by provider. */ cache_write_tokens?: number | null; /** * The number of input tokens served from cache. None if not reported by provider. */ cached_input_tokens?: number | null; /** * The number of tokens generated by the agent. */ completion_tokens?: number; /** * Estimate of tokens currently in the context window. */ context_tokens?: number | null; message_type?: 'usage_statistics'; /** * The number of tokens in the prompt. */ prompt_tokens?: number; /** * The number of reasoning/thinking tokens generated. None if not reported by * provider. */ reasoning_tokens?: number | null; /** * The background task run IDs associated with the agent interaction */ run_ids?: Array | null; /** * The number of steps taken by the agent. */ step_count?: number; /** * The total number of tokens processed by the agent. */ total_tokens?: number; } } export type LettaUserMessageContentUnion = TextContent | ImageContent; /** * A message generated by the system. Never streamed back on a response, only used * for cursor pagination. * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message content (str): The message content sent by the system */ export type Message = | SystemMessage | UserMessage | ReasoningMessage | HiddenReasoningMessage | ToolCallMessage | ToolsAPI.ToolReturnMessage | AssistantMessage | ApprovalRequestMessage | ApprovalResponseMessage | SummaryMessage | EventMessage; export type MessageRole = 'assistant' | 'user' | 'tool' | 'function' | 'system' | 'approval' | 'summary'; export type MessageType = | 'system_message' | 'user_message' | 'assistant_message' | 'reasoning_message' | 'hidden_reasoning_message' | 'tool_call_message' | 'tool_return_message' | 'approval_request_message' | 'approval_response_message' | 'summary_message' | 'event_message'; /** * A placeholder for reasoning content we know is present, but isn't returned by * the provider (e.g. OpenAI GPT-5 on ChatCompletions) */ export interface OmittedReasoningContent { /** * A unique identifier for this reasoning step. */ signature?: string | null; /** * Indicates this is an omitted reasoning step. */ type?: 'omitted_reasoning'; } /** * Sent via the Anthropic Messages API */ export interface ReasoningContent { /** * Whether the reasoning content was generated by a reasoner model that processed * this step. */ is_native: boolean; /** * The intermediate reasoning or thought process content. */ reasoning: string; /** * A unique identifier for this reasoning step. */ signature?: string | null; /** * Indicates this is a reasoning/intermediate step. */ type?: 'reasoning'; } /** * Representation of an agent's internal reasoning. * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message source (Literal["reasoner_model", "non_reasoner_model"]): Whether the * reasoning content was generated natively by a reasoner model or derived via * prompting reasoning (str): The internal reasoning of the agent signature * (Optional[str]): The model-generated signature of the reasoning step */ export interface ReasoningMessage { id: string; date: string; reasoning: string; is_err?: boolean | null; /** * The type of the message. */ message_type?: 'reasoning_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; signature?: string | null; source?: 'reasoner_model' | 'non_reasoner_model'; step_id?: string | null; } /** * Sent via the Anthropic Messages API */ export interface RedactedReasoningContent { /** * The redacted or filtered intermediate reasoning content. */ data: string; /** * Indicates this is a redacted thinking step. */ type?: 'redacted_reasoning'; } /** * Representation of a run - a conversation or processing session for an agent. * Runs track when agents process messages and maintain the relationship between * agents, steps, and messages. */ export interface Run { /** * The human-friendly ID of the Run */ id: string; /** * The unique identifier of the agent associated with the run. */ agent_id: string; /** * Whether the run was created in background mode. */ background?: boolean | null; /** * The base template ID that the run belongs to. */ base_template_id?: string | null; /** * Optional error message from attempting to POST the callback endpoint. */ callback_error?: string | null; /** * Timestamp when the callback was last attempted. */ callback_sent_at?: string | null; /** * HTTP status code returned by the callback endpoint. */ callback_status_code?: number | null; /** * If set, POST to this URL when the run completes. */ callback_url?: string | null; /** * The timestamp when the run was completed. */ completed_at?: string | null; /** * The unique identifier of the conversation associated with the run. */ conversation_id?: string | null; /** * The timestamp when the run was created. */ created_at?: string; /** * Additional metadata for the run. */ metadata?: { [key: string]: unknown } | null; /** * The request configuration for the run. */ request_config?: Run.RequestConfig | null; /** * The current status of the run. */ status?: 'created' | 'running' | 'completed' | 'failed' | 'cancelled'; /** * The reason why the run was stopped. */ stop_reason?: RunsAPI.StopReasonType | null; /** * Total run duration in nanoseconds */ total_duration_ns?: number | null; /** * Time to first token for a run in nanoseconds */ ttft_ns?: number | null; } export namespace Run { /** * The request configuration for the run. */ export interface RequestConfig { /** * The name of the message argument in the designated message tool. */ assistant_message_tool_kwarg?: string; /** * The name of the designated message tool. */ assistant_message_tool_name?: string; /** * Only return specified message types in the response. If `None` (default) returns * all messages. */ include_return_message_types?: Array | null; /** * Whether the server should parse specific tool call arguments (default * `send_message`) as `AssistantMessage` objects. */ use_assistant_message?: boolean; } } /** * A message representing a summary of the conversation. Sent to the LLM as a user * or system message depending on the provider. */ export interface SummaryMessage { id: string; date: string; summary: string; /** * Statistics about a memory compaction operation. */ compaction_stats?: SummaryMessage.CompactionStats | null; is_err?: boolean | null; message_type?: 'summary_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; } export namespace SummaryMessage { /** * Statistics about a memory compaction operation. */ export interface CompactionStats { /** * The model's context window size */ context_window: number; /** * Number of messages after compaction */ messages_count_after: number; /** * Number of messages before compaction */ messages_count_before: number; /** * What triggered the compaction (e.g., 'context_window_exceeded', * 'post_step_context_check') */ trigger: string; /** * Token count after compaction (message tokens only, does not include tool * definitions) */ context_tokens_after?: number | null; /** * Token count before compaction (from LLM usage stats, includes full context sent * to LLM) */ context_tokens_before?: number | null; } } /** * A message generated by the system. Never streamed back on a response, only used * for cursor pagination. * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message content (str): The message content sent by the system */ export interface SystemMessage { id: string; /** * The message content sent by the system */ content: string; date: string; is_err?: boolean | null; /** * The type of the message. */ message_type?: 'system_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; } export interface TextContent { /** * The text content of the message. */ text: string; /** * Stores a unique identifier for any reasoning associated with this text content. */ signature?: string | null; /** * The type of the message. */ type?: 'text'; } export interface ToolCall { arguments: string; name: string; tool_call_id: string; } export interface ToolCallContent { /** * A unique identifier for this specific tool call instance. */ id: string; /** * The parameters being passed to the tool, structured as a dictionary of parameter * names to values. */ input: { [key: string]: unknown }; /** * The name of the tool being called. */ name: string; /** * Stores a unique identifier for any reasoning associated with this tool call. */ signature?: string | null; /** * Indicates this content represents a tool call event. */ type?: 'tool_call'; } export interface ToolCallDelta { arguments?: string | null; name?: string | null; tool_call_id?: string | null; } /** * A message representing a request to call a tool (generated by the LLM to trigger * tool execution). * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message tool_call (Union[ToolCall, ToolCallDelta]): The tool call */ export interface ToolCallMessage { id: string; date: string; /** * @deprecated */ tool_call: ToolCall | ToolCallDelta; is_err?: boolean | null; /** * The type of the message. */ message_type?: 'tool_call_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; tool_calls?: Array | ToolCallDelta | null; } export interface ToolReturn { status: 'success' | 'error'; tool_call_id: string; /** * The tool return value - either a string or list of content parts (text/image) */ tool_return: Array | string; stderr?: Array | null; stdout?: Array | null; /** * The message type to be created. */ type?: 'tool'; } export interface ToolReturnContent { /** * The content returned by the tool execution. */ content: string; /** * Indicates whether the tool execution resulted in an error. */ is_error: boolean; /** * References the ID of the ToolCallContent that initiated this tool call. */ tool_call_id: string; /** * Indicates this content represents a tool return event. */ type?: 'tool_return'; } export interface UpdateAssistantMessage { /** * The message content sent by the assistant (can be a string or an array of * content parts) */ content: Array | string; message_type?: 'assistant_message'; } export interface UpdateReasoningMessage { reasoning: string; message_type?: 'reasoning_message'; } export interface UpdateSystemMessage { /** * The message content sent by the system (can be a string or an array of * multi-modal content parts) */ content: string; message_type?: 'system_message'; } export interface UpdateUserMessage { /** * The message content sent by the user (can be a string or an array of multi-modal * content parts) */ content: Array | string; message_type?: 'user_message'; } /** * A message sent by the user. Never streamed back on a response, only used for * cursor pagination. * * Args: id (str): The ID of the message date (datetime): The date the message was * created in ISO format name (Optional[str]): The name of the sender of the * message content (Union[str, List[LettaUserMessageContentUnion]]): The message * content sent by the user (can be a string or an array of multi-modal content * parts) */ export interface UserMessage { id: string; /** * The message content sent by the user (can be a string or an array of multi-modal * content parts) */ content: Array | string; date: string; is_err?: boolean | null; /** * The type of the message. */ message_type?: 'user_message'; name?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; run_id?: string | null; sender_id?: string | null; seq_id?: number | null; step_id?: string | null; } export type MessageCancelResponse = { [key: string]: unknown }; export type MessageCreateParams = MessageCreateParamsNonStreaming | MessageCreateParamsStreaming; export interface MessageCreateParamsBase { /** * @deprecated The name of the message argument in the designated message tool. * Still supported for legacy agent types, but deprecated for letta_v1_agent * onward. */ assistant_message_tool_kwarg?: string; /** * @deprecated The name of the designated message tool. Still supported for legacy * agent types, but deprecated for letta_v1_agent onward. */ assistant_message_tool_name?: string; /** * Whether to process the request in the background (only used when * streaming=true). */ background?: boolean; /** * Client-side skills available in the environment. These are rendered in the * system prompt's available skills section alongside agent-scoped skills from * MemFS. */ client_skills?: Array | null; /** * Client-side tools that the agent can call. When the agent calls a client-side * tool, execution pauses and returns control to the client to execute the tool and * provide the result via a ToolReturn. */ client_tools?: Array | null; /** * @deprecated If set to True, enables reasoning before responses or tool calls * from the agent. */ enable_thinking?: string; /** * If True, compaction events emit structured `SummaryMessage` and `EventMessage` * types. If False (default), compaction messages are not included in the response. */ include_compaction_messages?: boolean; /** * Whether to include periodic keepalive ping messages in the stream to prevent * connection timeouts (only used when streaming=true). */ include_pings?: boolean; /** * Only return specified message types in the response. If `None` (default) returns * all messages. */ include_return_message_types?: Array | null; /** * Syntactic sugar for a single user message. Equivalent to messages=[{'role': * 'user', 'content': input}]. */ input?: | string | Array< | TextContent | ImageContent | ToolCallContent | ToolReturnContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent | MessageCreateParams.SummarizedReasoningContent > | null; /** * Maximum number of steps the agent should take to process the request. */ max_steps?: number; /** * The messages to be sent to the agent. */ messages?: Array | null; /** * Model handle to use for this request instead of the agent's default model. This * allows sending a message to a different model without changing the agent's * configuration. */ override_model?: string | null; /** * Optional per-request system prompt override. When set, this is passed directly * to the underlying LLM request and bypasses the persisted/compiled system message * for that request. */ override_system?: string | null; /** * If True, returns log probabilities of the output tokens in the response. Useful * for RL training. Only supported for OpenAI-compatible providers (including * SGLang). */ return_logprobs?: boolean; /** * If True, returns token IDs and logprobs for ALL LLM generations in the agent * step, not just the last one. Uses SGLang native /generate endpoint. Returns * 'turns' field with TurnTokenData for each assistant/tool turn. Required for * proper multi-turn RL training with loss masking. */ return_token_ids?: boolean; /** * Flag to determine if individual tokens should be streamed, rather than streaming * per step (only used when streaming=true). */ stream_tokens?: boolean; /** * If True, returns a streaming response (Server-Sent Events). If False (default), * returns a complete response. */ streaming?: boolean; /** * Number of most likely tokens to return at each position (0-20). Requires * return_logprobs=True. */ top_logprobs?: number | null; /** * @deprecated Whether the server should parse specific tool call arguments * (default `send_message`) as `AssistantMessage` objects. Still supported for * legacy agent types, but deprecated for letta_v1_agent onward. */ use_assistant_message?: boolean; } export namespace MessageCreateParams { /** * Schema for a client-side skill passed in the request. * * Client-side skills represent environment-provided capabilities (e.g. * project-scoped skills) that are not stored in the agent's MemFS but should * appear in the system prompt's available skills section. */ export interface ClientSkill { /** * Description of what the skill does */ description: string; /** * Path or location hint for the skill (e.g. skills/my-skill/SKILL.md) */ location: string; /** * The name of the skill */ name: string; } /** * Schema for a client-side tool passed in the request. * * Client-side tools are executed by the client, not the server. When the agent * calls a client-side tool, execution pauses and returns control to the client to * execute the tool and provide the result. */ export interface ClientTool { /** * The name of the tool function */ name: string; /** * Description of what the tool does */ description?: string | null; /** * JSON Schema for the function parameters */ parameters?: { [key: string]: unknown } | null; } /** * The style of reasoning content returned by the OpenAI Responses API */ export interface SummarizedReasoningContent { /** * The unique identifier for this reasoning step. */ id: string; /** * Summaries of the reasoning content. */ summary: Array; /** * The encrypted reasoning content. */ encrypted_content?: string; /** * Indicates this is a summarized reasoning step. */ type?: 'summarized_reasoning'; } export namespace SummarizedReasoningContent { export interface Summary { /** * The index of the summary part. */ index: number; /** * The text of the summary part. */ text: string; } } /** * Submit tool return(s) from client-side tool execution. * * This is the preferred way to send tool results back to the agent after * client-side tool execution. It is equivalent to sending an ApprovalCreate with * tool return approvals, but provides a cleaner API for the common case. */ export interface ToolReturnCreate { /** * List of tool returns from client-side execution */ tool_returns: Array; /** * The multi-agent group that the message was sent in */ group_id?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; /** * The message type to be created. */ type?: 'tool_return'; } export type MessageCreateParamsNonStreaming = MessagesAPI.MessageCreateParamsNonStreaming; export type MessageCreateParamsStreaming = MessagesAPI.MessageCreateParamsStreaming; } export interface MessageCreateParamsNonStreaming extends MessageCreateParamsBase { /** * If True, returns a streaming response (Server-Sent Events). If False (default), * returns a complete response. */ streaming?: false; } export interface MessageCreateParamsStreaming extends MessageCreateParamsBase { /** * If True, returns a streaming response (Server-Sent Events). If False (default), * returns a complete response. */ streaming: true; } export interface MessageListParams extends ArrayPageParams { /** * @deprecated The name of the message argument. */ assistant_message_tool_kwarg?: string; /** * @deprecated The name of the designated message tool. */ assistant_message_tool_name?: string; /** * Conversation ID to filter messages by. */ conversation_id?: string | null; /** * Group ID to filter messages by. */ group_id?: string | null; /** * Whether to include error messages and error statuses. For debugging purposes * only. */ include_err?: boolean | null; /** * Message types to include in response. When null, all message types are returned. */ include_return_message_types?: Array | null; /** * @deprecated Whether to use assistant messages */ use_assistant_message?: boolean; } export interface MessageCancelParams { /** * Optional list of run IDs to cancel */ run_ids?: Array | null; } export interface MessageCompactParams { /** * Configuration for conversation compaction / summarization. * * Per-model settings (temperature, max tokens, etc.) are derived from the default * configuration for that handle. */ compaction_settings?: MessageCompactParams.CompactionSettings | null; } export namespace MessageCompactParams { /** * Configuration for conversation compaction / summarization. * * Per-model settings (temperature, max tokens, etc.) are derived from the default * configuration for that handle. */ export interface CompactionSettings { /** * The maximum length of the summary in characters. If none, no clipping is * performed. */ clip_chars?: number | null; /** * The type of summarization technique use. */ mode?: 'all' | 'sliding_window' | 'self_compact_all' | 'self_compact_sliding_window'; /** * Model handle to use for sliding_window/all summarization (format: * provider/model-name). If None, uses lightweight provider-specific defaults. */ model?: string | null; /** * Optional model settings used to override defaults for the summarizer model. */ model_settings?: | AgentsAPI.OpenAIModelSettings | CompactionSettings.SgLangModelSettings | AgentsAPI.AnthropicModelSettings | AgentsAPI.GoogleAIModelSettings | AgentsAPI.GoogleVertexModelSettings | AgentsAPI.AzureModelSettings | AgentsAPI.XaiModelSettings | CompactionSettings.MoonshotModelSettings | CompactionSettings.ZaiModelSettings | CompactionSettings.MoonshotCodingModelSettings | AgentsAPI.GroqModelSettings | AgentsAPI.DeepseekModelSettings | AgentsAPI.TogetherModelSettings | AgentsAPI.BedrockModelSettings | CompactionSettings.BasetenModelSettings | CompactionSettings.OpenRouterModelSettings | CompactionSettings.ChatGptoAuthModelSettings | null; /** * The prompt to use for summarization. If None, uses mode-specific default. */ prompt?: string | null; /** * Whether to include an acknowledgement post-prompt (helps prevent non-summary * outputs). */ prompt_acknowledgement?: boolean; /** * The percentage of the context window to keep post-summarization (only used in * sliding window modes). */ sliding_window_percentage?: number; } export namespace CompactionSettings { /** * SGLang model configuration (OpenAI-compatible runtime with SGLang-specific * parsing). */ export interface SgLangModelSettings { /** * The maximum number of tokens the model can generate. */ max_output_tokens?: number; /** * Whether to enable parallel tool calling. */ parallel_tool_calls?: boolean; /** * The type of the provider. */ provider_type?: 'sglang'; /** * The reasoning configuration for the model. */ reasoning?: SgLangModelSettings.Reasoning; /** * The response format for the model. */ response_format?: | AgentsAPI.TextResponseFormat | AgentsAPI.JsonSchemaResponseFormat | AgentsAPI.JsonObjectResponseFormat | null; /** * Enable strict mode for tool calling. When true, tool outputs are guaranteed to * match JSON schemas. */ strict?: boolean; /** * The temperature of the model. */ temperature?: number; /** * SGLang tool call parser name (for example 'glm47', 'qwen25', or 'hermes'). */ tool_call_parser?: string | null; } export namespace SgLangModelSettings { /** * The reasoning configuration for the model. */ export interface Reasoning { /** * The reasoning effort to use when generating text reasoning models */ reasoning_effort?: 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'; } } /** * Moonshot/Kimi model configuration (OpenAI-compatible). */ export interface MoonshotModelSettings { /** * The maximum number of tokens the model can generate. */ max_output_tokens?: number; /** * Whether to enable parallel tool calling. */ parallel_tool_calls?: boolean; /** * The type of the provider. */ provider_type?: 'moonshot'; /** * The response format for the model. */ response_format?: | AgentsAPI.TextResponseFormat | AgentsAPI.JsonSchemaResponseFormat | AgentsAPI.JsonObjectResponseFormat | null; /** * Enable strict mode for tool calling. When true, tool outputs are guaranteed to * match JSON schemas. */ strict?: boolean; /** * The temperature of the model. */ temperature?: number; } /** * Z.ai (ZhipuAI) model configuration (OpenAI-compatible). */ export interface ZaiModelSettings { /** * The maximum number of tokens the model can generate. */ max_output_tokens?: number; /** * Whether to enable parallel tool calling. */ parallel_tool_calls?: boolean; /** * The type of the provider. */ provider_type?: 'zai'; /** * The response format for the model. */ response_format?: | AgentsAPI.TextResponseFormat | AgentsAPI.JsonSchemaResponseFormat | AgentsAPI.JsonObjectResponseFormat | null; /** * The temperature of the model. */ temperature?: number; /** * The thinking configuration for GLM-4.5+ models. */ thinking?: ZaiModelSettings.Thinking; } export namespace ZaiModelSettings { /** * The thinking configuration for GLM-4.5+ models. */ export interface Thinking { /** * If False, preserved thinking is used (recommended for agents). */ clear_thinking?: boolean; /** * Whether thinking is enabled or disabled. */ type?: 'enabled' | 'disabled'; } } /** * Kimi Code model configuration (Anthropic-compatible). */ export interface MoonshotCodingModelSettings { /** * Effort level for supported Anthropic models (controls token spending). 'xhigh' * and 'max' are available on Opus 4.6+. Not setting this gives similar performance * to 'high'. */ effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max' | null; /** * The maximum number of tokens the model can generate. */ max_output_tokens?: number; /** * Whether to enable parallel tool calling. */ parallel_tool_calls?: boolean; /** * The type of the provider. */ provider_type?: 'moonshot_coding'; /** * The response format for the model. */ response_format?: | AgentsAPI.TextResponseFormat | AgentsAPI.JsonSchemaResponseFormat | AgentsAPI.JsonObjectResponseFormat | null; /** * Enable strict mode for tool calling. When true, tool outputs are guaranteed to * match JSON schemas. */ strict?: boolean; /** * The temperature of the model. */ temperature?: number; /** * The thinking configuration for the model. */ thinking?: MoonshotCodingModelSettings.Thinking; /** * Soft control for how verbose model output should be, used for GPT-5 models. */ verbosity?: 'low' | 'medium' | 'high' | null; } export namespace MoonshotCodingModelSettings { /** * The thinking configuration for the model. */ export interface Thinking { /** * The maximum number of tokens the model can use for extended thinking. */ budget_tokens?: number; /** * The type of thinking to use. */ type?: 'enabled' | 'disabled'; } } /** * Baseten model configuration (OpenAI-compatible). */ export interface BasetenModelSettings { /** * The maximum number of tokens the model can generate. */ max_output_tokens?: number; /** * Whether to enable parallel tool calling. */ parallel_tool_calls?: boolean; /** * The type of the provider. */ provider_type?: 'baseten'; /** * The temperature of the model. */ temperature?: number; } /** * OpenRouter model configuration (OpenAI-compatible). */ export interface OpenRouterModelSettings { /** * The maximum number of tokens the model can generate. */ max_output_tokens?: number; /** * Whether to enable parallel tool calling. */ parallel_tool_calls?: boolean; /** * The type of the provider. */ provider_type?: 'openrouter'; /** * The response format for the model. */ response_format?: | AgentsAPI.TextResponseFormat | AgentsAPI.JsonSchemaResponseFormat | AgentsAPI.JsonObjectResponseFormat | null; /** * The temperature of the model. */ temperature?: number; } /** * ChatGPT OAuth model configuration (uses ChatGPT backend API). */ export interface ChatGptoAuthModelSettings { /** * The maximum number of tokens the model can generate. */ max_output_tokens?: number; /** * Whether to enable parallel tool calling. */ parallel_tool_calls?: boolean; /** * The type of the provider. */ provider_type?: 'chatgpt_oauth'; /** * The reasoning configuration for the model. */ reasoning?: ChatGptoAuthModelSettings.Reasoning; /** * The temperature of the model. */ temperature?: number; } export namespace ChatGptoAuthModelSettings { /** * The reasoning configuration for the model. */ export interface Reasoning { /** * The reasoning effort level for GPT-5.x and o-series models. */ reasoning_effort?: 'none' | 'low' | 'medium' | 'high' | 'xhigh'; } } } } export interface MessageCreateAsyncParams { /** * @deprecated The name of the message argument in the designated message tool. * Still supported for legacy agent types, but deprecated for letta_v1_agent * onward. */ assistant_message_tool_kwarg?: string; /** * @deprecated The name of the designated message tool. Still supported for legacy * agent types, but deprecated for letta_v1_agent onward. */ assistant_message_tool_name?: string; /** * Optional callback URL to POST to when the job completes */ callback_url?: string | null; /** * Client-side skills available in the environment. These are rendered in the * system prompt's available skills section alongside agent-scoped skills from * MemFS. */ client_skills?: Array | null; /** * Client-side tools that the agent can call. When the agent calls a client-side * tool, execution pauses and returns control to the client to execute the tool and * provide the result via a ToolReturn. */ client_tools?: Array | null; /** * @deprecated If set to True, enables reasoning before responses or tool calls * from the agent. */ enable_thinking?: string; /** * If True, compaction events emit structured `SummaryMessage` and `EventMessage` * types. If False (default), compaction messages are not included in the response. */ include_compaction_messages?: boolean; /** * Only return specified message types in the response. If `None` (default) returns * all messages. */ include_return_message_types?: Array | null; /** * Syntactic sugar for a single user message. Equivalent to messages=[{'role': * 'user', 'content': input}]. */ input?: | string | Array< | TextContent | ImageContent | ToolCallContent | ToolReturnContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent | MessageCreateAsyncParams.SummarizedReasoningContent > | null; /** * Maximum number of steps the agent should take to process the request. */ max_steps?: number; /** * The messages to be sent to the agent. */ messages?: Array< AgentsAPI.MessageCreate | ApprovalCreate | MessageCreateAsyncParams.ToolReturnCreate > | null; /** * Model handle to use for this request instead of the agent's default model. This * allows sending a message to a different model without changing the agent's * configuration. */ override_model?: string | null; /** * Optional per-request system prompt override. When set, this is passed directly * to the underlying LLM request and bypasses the persisted/compiled system message * for that request. */ override_system?: string | null; /** * If True, returns log probabilities of the output tokens in the response. Useful * for RL training. Only supported for OpenAI-compatible providers (including * SGLang). */ return_logprobs?: boolean; /** * If True, returns token IDs and logprobs for ALL LLM generations in the agent * step, not just the last one. Uses SGLang native /generate endpoint. Returns * 'turns' field with TurnTokenData for each assistant/tool turn. Required for * proper multi-turn RL training with loss masking. */ return_token_ids?: boolean; /** * Number of most likely tokens to return at each position (0-20). Requires * return_logprobs=True. */ top_logprobs?: number | null; /** * @deprecated Whether the server should parse specific tool call arguments * (default `send_message`) as `AssistantMessage` objects. Still supported for * legacy agent types, but deprecated for letta_v1_agent onward. */ use_assistant_message?: boolean; } export namespace MessageCreateAsyncParams { /** * Schema for a client-side skill passed in the request. * * Client-side skills represent environment-provided capabilities (e.g. * project-scoped skills) that are not stored in the agent's MemFS but should * appear in the system prompt's available skills section. */ export interface ClientSkill { /** * Description of what the skill does */ description: string; /** * Path or location hint for the skill (e.g. skills/my-skill/SKILL.md) */ location: string; /** * The name of the skill */ name: string; } /** * Schema for a client-side tool passed in the request. * * Client-side tools are executed by the client, not the server. When the agent * calls a client-side tool, execution pauses and returns control to the client to * execute the tool and provide the result. */ export interface ClientTool { /** * The name of the tool function */ name: string; /** * Description of what the tool does */ description?: string | null; /** * JSON Schema for the function parameters */ parameters?: { [key: string]: unknown } | null; } /** * The style of reasoning content returned by the OpenAI Responses API */ export interface SummarizedReasoningContent { /** * The unique identifier for this reasoning step. */ id: string; /** * Summaries of the reasoning content. */ summary: Array; /** * The encrypted reasoning content. */ encrypted_content?: string; /** * Indicates this is a summarized reasoning step. */ type?: 'summarized_reasoning'; } export namespace SummarizedReasoningContent { export interface Summary { /** * The index of the summary part. */ index: number; /** * The text of the summary part. */ text: string; } } /** * Submit tool return(s) from client-side tool execution. * * This is the preferred way to send tool results back to the agent after * client-side tool execution. It is equivalent to sending an ApprovalCreate with * tool return approvals, but provides a cleaner API for the common case. */ export interface ToolReturnCreate { /** * List of tool returns from client-side execution */ tool_returns: Array; /** * The multi-agent group that the message was sent in */ group_id?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; /** * The message type to be created. */ type?: 'tool_return'; } } export interface MessageResetParams { /** * If true, adds the default initial messages after resetting. */ add_default_initial_messages?: boolean; } export interface MessageStreamParams { /** * @deprecated The name of the message argument in the designated message tool. * Still supported for legacy agent types, but deprecated for letta_v1_agent * onward. */ assistant_message_tool_kwarg?: string; /** * @deprecated The name of the designated message tool. Still supported for legacy * agent types, but deprecated for letta_v1_agent onward. */ assistant_message_tool_name?: string; /** * Whether to process the request in the background (only used when * streaming=true). */ background?: boolean; /** * Client-side skills available in the environment. These are rendered in the * system prompt's available skills section alongside agent-scoped skills from * MemFS. */ client_skills?: Array | null; /** * Client-side tools that the agent can call. When the agent calls a client-side * tool, execution pauses and returns control to the client to execute the tool and * provide the result via a ToolReturn. */ client_tools?: Array | null; /** * @deprecated If set to True, enables reasoning before responses or tool calls * from the agent. */ enable_thinking?: string; /** * If True, compaction events emit structured `SummaryMessage` and `EventMessage` * types. If False (default), compaction messages are not included in the response. */ include_compaction_messages?: boolean; /** * Whether to include periodic keepalive ping messages in the stream to prevent * connection timeouts (only used when streaming=true). */ include_pings?: boolean; /** * Only return specified message types in the response. If `None` (default) returns * all messages. */ include_return_message_types?: Array | null; /** * Syntactic sugar for a single user message. Equivalent to messages=[{'role': * 'user', 'content': input}]. */ input?: | string | Array< | TextContent | ImageContent | ToolCallContent | ToolReturnContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent | MessageStreamParams.SummarizedReasoningContent > | null; /** * Maximum number of steps the agent should take to process the request. */ max_steps?: number; /** * The messages to be sent to the agent. */ messages?: Array | null; /** * Model handle to use for this request instead of the agent's default model. This * allows sending a message to a different model without changing the agent's * configuration. */ override_model?: string | null; /** * Optional per-request system prompt override. When set, this is passed directly * to the underlying LLM request and bypasses the persisted/compiled system message * for that request. */ override_system?: string | null; /** * If True, returns log probabilities of the output tokens in the response. Useful * for RL training. Only supported for OpenAI-compatible providers (including * SGLang). */ return_logprobs?: boolean; /** * If True, returns token IDs and logprobs for ALL LLM generations in the agent * step, not just the last one. Uses SGLang native /generate endpoint. Returns * 'turns' field with TurnTokenData for each assistant/tool turn. Required for * proper multi-turn RL training with loss masking. */ return_token_ids?: boolean; /** * Flag to determine if individual tokens should be streamed, rather than streaming * per step (only used when streaming=true). */ stream_tokens?: boolean; /** * If True, returns a streaming response (Server-Sent Events). If False (default), * returns a complete response. */ streaming?: boolean; /** * Number of most likely tokens to return at each position (0-20). Requires * return_logprobs=True. */ top_logprobs?: number | null; /** * @deprecated Whether the server should parse specific tool call arguments * (default `send_message`) as `AssistantMessage` objects. Still supported for * legacy agent types, but deprecated for letta_v1_agent onward. */ use_assistant_message?: boolean; } export namespace MessageStreamParams { /** * Schema for a client-side skill passed in the request. * * Client-side skills represent environment-provided capabilities (e.g. * project-scoped skills) that are not stored in the agent's MemFS but should * appear in the system prompt's available skills section. */ export interface ClientSkill { /** * Description of what the skill does */ description: string; /** * Path or location hint for the skill (e.g. skills/my-skill/SKILL.md) */ location: string; /** * The name of the skill */ name: string; } /** * Schema for a client-side tool passed in the request. * * Client-side tools are executed by the client, not the server. When the agent * calls a client-side tool, execution pauses and returns control to the client to * execute the tool and provide the result. */ export interface ClientTool { /** * The name of the tool function */ name: string; /** * Description of what the tool does */ description?: string | null; /** * JSON Schema for the function parameters */ parameters?: { [key: string]: unknown } | null; } /** * The style of reasoning content returned by the OpenAI Responses API */ export interface SummarizedReasoningContent { /** * The unique identifier for this reasoning step. */ id: string; /** * Summaries of the reasoning content. */ summary: Array; /** * The encrypted reasoning content. */ encrypted_content?: string; /** * Indicates this is a summarized reasoning step. */ type?: 'summarized_reasoning'; } export namespace SummarizedReasoningContent { export interface Summary { /** * The index of the summary part. */ index: number; /** * The text of the summary part. */ text: string; } } /** * Submit tool return(s) from client-side tool execution. * * This is the preferred way to send tool results back to the agent after * client-side tool execution. It is equivalent to sending an ApprovalCreate with * tool return approvals, but provides a cleaner API for the common case. */ export interface ToolReturnCreate { /** * List of tool returns from client-side execution */ tool_returns: Array; /** * The multi-agent group that the message was sent in */ group_id?: string | null; /** * The offline threading id (OTID). Set by the client to deduplicate requests. Used * for idempotency in background streaming mode — each message in a request must * have a unique OTID. Retries of the same request should reuse the same OTIDs. */ otid?: string | null; /** * The message type to be created. */ type?: 'tool_return'; } } export declare namespace Messages { export { type ApprovalCreate as ApprovalCreate, type ApprovalRequestMessage as ApprovalRequestMessage, type ApprovalResponseMessage as ApprovalResponseMessage, type ApprovalReturn as ApprovalReturn, type AssistantMessage as AssistantMessage, type EventMessage as EventMessage, type HiddenReasoningMessage as HiddenReasoningMessage, type ImageContent as ImageContent, type InternalMessage as InternalMessage, type JobStatus as JobStatus, type JobType as JobType, type LettaAssistantMessageContentUnion as LettaAssistantMessageContentUnion, type LettaRequest as LettaRequest, type LettaResponse as LettaResponse, type LettaStreamingRequest as LettaStreamingRequest, type LettaStreamingResponse as LettaStreamingResponse, type LettaUserMessageContentUnion as LettaUserMessageContentUnion, type Message as Message, type MessageRole as MessageRole, type MessageType as MessageType, type OmittedReasoningContent as OmittedReasoningContent, type ReasoningContent as ReasoningContent, type ReasoningMessage as ReasoningMessage, type RedactedReasoningContent as RedactedReasoningContent, type Run as Run, type SummaryMessage as SummaryMessage, type SystemMessage as SystemMessage, type TextContent as TextContent, type ToolCall as ToolCall, type ToolCallContent as ToolCallContent, type ToolCallDelta as ToolCallDelta, type ToolCallMessage as ToolCallMessage, type ToolReturn as ToolReturn, type ToolReturnContent as ToolReturnContent, type UpdateAssistantMessage as UpdateAssistantMessage, type UpdateReasoningMessage as UpdateReasoningMessage, type UpdateSystemMessage as UpdateSystemMessage, type UpdateUserMessage as UpdateUserMessage, type UserMessage as UserMessage, type MessageCancelResponse as MessageCancelResponse, type MessagesArrayPage as MessagesArrayPage, type MessageCreateParams as MessageCreateParams, type MessageCreateParamsNonStreaming as MessageCreateParamsNonStreaming, type MessageCreateParamsStreaming as MessageCreateParamsStreaming, type MessageListParams as MessageListParams, type MessageCancelParams as MessageCancelParams, type MessageCompactParams as MessageCompactParams, type MessageCreateAsyncParams as MessageCreateAsyncParams, type MessageResetParams as MessageResetParams, type MessageStreamParams as MessageStreamParams, }; }