import type { OpenAICompat, OpenAIReasoningDisableMode, OpenAIStreamMarkupHealingPattern, OpenRouterRouting, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat, ResolvedOpenAISharedCompat, VercelGatewayRouting } from "@oh-my-pi/pi-catalog/types";
import { type Api, type AssistantMessage, type CacheRetention, type Context, type ImageContent, type Message, type MessageAttribution, type Model, type Provider, type ServiceTier, type StopReason, type StreamOptions, type TextContent, type TextSignatureV1, type ThinkingContent, type Tool, type ToolCall, type ToolResultMessage } from "../types";
import { kStreamingLastParseLen, kStreamingPartialJson } from "../utils/block-symbols";
import type { AssistantMessageEventStream } from "../utils/event-stream";
import type { CapturedHttpErrorResponse } from "../utils/http-inspector";
import type { ChatCompletionCreateParamsStreaming } from "./openai-chat-wire";
import type { InputItem } from "./openai-codex/request-transformer";
import type { ResponseContentPartAddedEvent, ResponseCreateParamsStreaming, ResponseInput, ResponseInputContent, ResponseInputItem, ResponseOutputItem, ResponseOutputMessage, ResponseReasoningItem, ResponseStatus, ResponseStreamEvent } from "./openai-responses-wire";
export interface OpenAIModelIdentity {
    provider: string;
    id: string;
    baseUrl?: string;
}
export interface OpenAIStrictToolsScope {
    provider: string;
    baseUrl: string | undefined;
    modelId: string;
}
export interface OpenAIStrictToolsState {
    strictTools: {
        disabledModelScopes: Set<string>;
    };
}
export interface OpenAIRequestSetupModel extends OpenAIModelIdentity {
    headers?: Record<string, string>;
    premiumMultiplier?: number;
    compat?: Pick<ResolvedOpenAISharedCompat, "promptCacheSessionHeader">;
}
export interface OpenAIResponsesCacheOptions {
    cacheRetention?: CacheRetention;
    sessionId?: string;
    promptCacheKey?: string;
}
export interface OpenAIRequestSetupOptions {
    apiKey?: string;
    extraHeaders?: Record<string, string>;
    initiatorOverride?: MessageAttribution;
    messages: Message[];
    defaultBaseUrl?: string;
    prependHeaders?: () => Record<string, string>;
    alibabaCodingPlanAuth?: boolean;
    azureChatCompletions?: {
        apiVersion: string;
        deploymentName: string;
    };
    openAISessionId?: string;
    promptCacheSessionId?: string;
}
export interface OpenAIRequestSetup {
    copilotPremiumRequests: number | undefined;
    baseUrl: string | undefined;
    headers: Record<string, string>;
    query: Record<string, string> | undefined;
    requestHeaders: Record<string, string>;
}
export declare function resolveOpenAIRequestSetup(model: OpenAIRequestSetupModel, options: OpenAIRequestSetupOptions): OpenAIRequestSetup;
export declare function applyOpenAIServiceTier(params: {
    service_tier?: ServiceTier | null | undefined;
}, serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): void;
/**
 * Adjust resolved cost by the service tier OpenAI actually billed — parity with
 * Codex (`applyCodexServiceTierPricing`), but with the standard (non-Codex)
 * multipliers. The served tier comes from the response echo, falling back to the
 * resolved request tier. Scoped to `provider: "openai"` (the only standard
 * Responses biller) so an echoed `service_tier` from an Azure/OpenRouter/Copilot
 * proxy can never skew those costs.
 */
export declare function applyOpenAIResponsesServiceTierCost(model: Pick<Model, "provider">, usage: AssistantMessage["usage"], responseServiceTier: unknown, requestServiceTier: ServiceTier | null | undefined): void;
export interface OpenAIUsageAccountingInput {
    promptTokens: number;
    outputTokens: number;
    cachedTokens: number;
    reasoningTokens: number;
    cacheWriteOpenRouter: number | undefined;
    cacheWriteDeepSeek: number | undefined;
    hasDeepSeekCacheHitAndMiss: boolean;
}
export interface OpenAIUsageAccounting {
    input: number;
    output: number;
    cacheRead: number;
    cacheWrite: number;
    totalTokens: number;
    reasoningTokens?: number;
}
export declare function calculateOpenAIUsageAccounting(accounting: OpenAIUsageAccountingInput): OpenAIUsageAccounting;
export declare function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined;
export declare function normalizeOpenRouterResponsesSessionId(sessionId: string | undefined): string | undefined;
export declare function getOpenAIResponsesPromptCacheKey(options: OpenAIResponsesCacheOptions | undefined): string | undefined;
export declare function getOpenAIResponsesRoutingSessionId(options: Pick<OpenAIResponsesCacheOptions, "cacheRetention" | "sessionId"> | undefined): string | undefined;
export declare function getOpenRouterResponsesSessionId(options: Pick<OpenAIResponsesCacheOptions, "cacheRetention" | "sessionId"> | undefined): string | undefined;
export declare function parseAzureDeploymentNameMap(value: string | undefined): Map<string, string>;
export declare function createOpenAIStrictToolsState(): OpenAIStrictToolsState;
export declare function clearOpenAIStrictToolsState(state: OpenAIStrictToolsState): void;
export declare function getOpenAIStrictToolsScope(model: OpenAIModelIdentity, resolvedBaseUrl: string | undefined): OpenAIStrictToolsScope;
export declare function isStrictToolsDisabledForScope(state: OpenAIStrictToolsState | undefined, scope: OpenAIStrictToolsScope | undefined): boolean;
export declare function disableStrictToolsForScope(state: OpenAIStrictToolsState | undefined, scope: OpenAIStrictToolsScope | undefined): void;
export declare function isOpenRouterAnthropicModel(model: OpenAIModelIdentity): boolean;
/**
 * Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
 * to a model id when no explicit variant is already present. A variant is considered
 * "already present" when `modelId` contains a colon after the last `/` separator —
 * which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
 * entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
 */
export declare function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string;
export declare function applyWireModelIdTransform(baseId: string, mode: ResolvedOpenAISharedCompat["wireModelIdMode"], openrouterVariant?: string): string;
export interface OpenAIOutputTokenParam {
    field: "max_tokens" | "max_completion_tokens" | "max_output_tokens";
    value: number;
}
export interface ResolveOpenAIOutputTokenInput {
    /** Wire field the endpoint expects for the output cap. */
    field: OpenAIOutputTokenParam["field"];
    /** Caller-supplied output cap (model-defaulted by `stream.ts`, or null/undefined on direct provider calls). */
    maxTokens: number | null | undefined;
    /** Whether the caller explicitly set `maxTokens` (routing omission only applies when false). */
    maxTokensExplicit: boolean;
    /** Model output cap (`model.maxTokens`). */
    modelMaxTokens: number | null | undefined;
    /** Drop the field entirely — proxies with unknown upstream caps (Ollama via `model.omitMaxOutputTokens`). */
    omitMaxOutputTokens: boolean;
    /** The model sits behind OpenRouter (catalog default caps are omitted so each upstream self-caps). */
    isOpenRouterHost: boolean;
    /** Endpoint always needs a cap (Kimi-family TPM math); supplies the model default when the caller did not. */
    alwaysSendMaxTokens: boolean;
    /** Hard provider clamp; defaults to {@link OPENAI_MAX_OUTPUT_TOKENS}. */
    providerOutputClamp?: number;
}
/**
 * Resolve the single output-token wire parameter shared by Chat Completions
 * (`max_tokens`/`max_completion_tokens`) and the Responses family
 * (`max_output_tokens`). Centralizes the provider exceptions that previously
 * lived inline in both `buildParams`:
 *  - `alwaysSendMaxTokens`: Kimi-family endpoints derive TPM limits from the
 *    cap and require one on every call, so default from the model cap (or
 *    {@link OPENAI_MAX_OUTPUT_TOKENS}) when the caller omitted it.
 *  - OpenRouter routing omission: OpenRouter fans out to upstreams whose output
 *    caps differ from the catalog value, so a catalog default above the routed
 *    upstream's cap makes OpenRouter skip that upstream. Omit catalog defaults
 *    (explicit caller caps still win) so `provider.order`/`only` is honored.
 *  - model/provider clamp: never exceed `model.maxTokens` or the provider clamp
 *    (`OPENAI_MAX_OUTPUT_TOKENS`, raised for GLM-5.2 reasoning by the caller).
 *  - `omitMaxOutputTokens`: proxies (Ollama) with unknown upstream caps drop it.
 */
export declare function resolveOpenAIOutputTokenParam(input: ResolveOpenAIOutputTokenInput): OpenAIOutputTokenParam | undefined;
export interface OpenAIGatewayRoutingParams {
    provider?: OpenRouterRouting;
    providerOptions?: {
        gateway?: {
            only?: string[];
            order?: string[];
        };
    };
}
export interface OpenAIGatewayRoutingCompat {
    isOpenRouterHost: boolean;
    openRouterRouting?: OpenRouterRouting;
    isVercelGatewayHost?: boolean;
    vercelGatewayRouting?: VercelGatewayRouting;
}
/**
 * Apply gateway routing preferences to the request body. OpenRouter routes via
 * the top-level `provider` field; the Vercel AI Gateway routes via
 * `providerOptions.gateway`. Both Chat Completions and Responses call this; the
 * Vercel branch is inert for Responses, whose resolved compat never sets
 * `isVercelGatewayHost`.
 */
export declare function applyOpenAIGatewayRouting(params: OpenAIGatewayRoutingParams, compat: OpenAIGatewayRoutingCompat): void;
export interface OpenAIExtraBodyOptions {
    /**
     * Fireworks rejects DeepSeek-style `thinking` toggles alongside OpenAI-style
     * `reasoning_effort`; drop `thinking` when the effort field carries the level.
     */
    dropThinkingWhenReasoningEffort?: boolean;
}
/**
 * Merge a compat/options `extraBody` blob into the request params. When
 * `dropThinkingWhenReasoningEffort` is set and `reasoning_effort` is present,
 * delete the conflicting `thinking` toggle (Fireworks rejects both together).
 */
export declare function applyOpenAIExtraBody<P extends object>(params: P, extraBody: Record<string, unknown> | undefined, options?: OpenAIExtraBodyOptions): void;
/**
 * Chat Completions streaming request body shaped by the OpenAI-family providers.
 * Extends the vendored SDK params with the compat dialect fields pi-ai emits
 * (binary `thinking`, Qwen `enable_thinking`/`chat_template_kwargs`, nested
 * `reasoning`, gateway `provider`/`providerOptions`, sampling extras). Lives in
 * the shared module beside the request-shaping helpers that mutate it.
 */
export type OpenAICompletionsParams = Omit<ChatCompletionCreateParamsStreaming, "reasoning_effort" | "service_tier"> & {
    top_k?: number;
    min_p?: number;
    repetition_penalty?: number;
    thinking?: {
        type: "enabled" | "disabled";
        keep?: "all";
    };
    enable_thinking?: boolean;
    preserve_thinking?: boolean;
    chat_template_kwargs?: {
        enable_thinking?: boolean;
        preserve_thinking?: boolean;
    };
    reasoning?: {
        effort?: string;
    } | {
        enabled: false;
    };
    reasoning_effort?: string | null;
    service_tier?: ServiceTier;
    tool_stream?: boolean;
    provider?: OpenAICompat["openRouterRouting"];
    providerOptions?: {
        gateway?: {
            only?: string[];
            order?: string[];
        };
    };
};
/** Reasoning-relevant slice of caller options the Chat Completions dialect dispatch reads. */
export interface ChatCompletionsReasoningOptions {
    reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
    disableReasoning?: boolean;
}
export type OpenAICompatEndpoint = "chat-completions" | "responses";
export type OpenAIReasoningDisableReason = "caller" | "forced-tool-choice" | "tool-choice" | "not-requested";
export type OpenAICompatPolicyCompat = ResolvedOpenAISharedCompat & Partial<ResolvedOpenAICompat> & Partial<ResolvedOpenAIResponsesCompat>;
export interface ResolveOpenAICompatPolicyOptions {
    endpoint: OpenAICompatEndpoint;
    compat?: OpenAICompatPolicyCompat;
    reasoning?: string;
    disableReasoning?: boolean;
    toolChoice?: unknown;
    strictResponsesPairing?: boolean;
    includeEncryptedReasoning?: boolean;
    filterReasoningHistory?: boolean;
    omitReasoningEffort?: boolean;
}
export interface OpenAICompatPolicy {
    endpoint: OpenAICompatEndpoint;
    compat: OpenAICompatPolicyCompat;
    reasoning: {
        modelSupported: boolean;
        supportsParams: boolean;
        requestedEffort?: string;
        wireEffort?: string;
        enabled: boolean;
        disabled: boolean;
        disableReason?: OpenAIReasoningDisableReason;
        dialect: ResolvedOpenAISharedCompat["thinkingFormat"];
        disableMode: OpenAIReasoningDisableMode;
        omitReasoningEffort: boolean;
        includeEncryptedReasoning: boolean;
        filterReasoningHistory: boolean;
        requiresReasoningContentForToolCalls: boolean;
        requiresReasoningContentForAllAssistantTurns: boolean;
        allowsSyntheticReasoningContentForToolCalls: boolean;
        reasoningContentField?: OpenAICompat["reasoningContentField"];
        requiresThinkingAsText: boolean;
    };
    tools: {
        strictResponsesPairing: boolean;
        toolCallIdKind: "default" | "openai-40" | "mistral-9-alnum";
    };
    messages: {
        systemRole: "system" | "developer";
        supportsDeveloperRole: boolean;
        supportsMultipleSystemMessages: boolean;
    };
    stream: {
        stripSpecialTokens: "deepseek" | false;
        markupHealingPattern?: OpenAIStreamMarkupHealingPattern;
        reasoningDeltasMayBeCumulative: boolean;
        emptyLengthFinishIsContextError: boolean;
    };
}
export declare function resolveOpenAICompatPolicy<TApi extends Api>(model: Model<TApi>, options: ResolveOpenAICompatPolicyOptions): OpenAICompatPolicy;
export declare function applyChatCompletionsCompatPolicy(params: OpenAICompletionsParams, policy: OpenAICompatPolicy): void;
export declare function applyChatCompletionsReasoningParams(params: OpenAICompletionsParams, model: Model<"openai-completions">, compat: ResolvedOpenAICompat, options: (ChatCompletionsReasoningOptions & {
    toolChoice?: unknown;
}) | undefined): void;
export declare function disableChatCompletionsReasoningForDialect(params: OpenAICompletionsParams, compat: ResolvedOpenAICompat): void;
/**
 * Output-token clamp for the Z.AI/GLM-5.2 reasoning dialect: these hosts accept
 * the full model window on reasoning turns, so clamp to the model cap. Returns
 * `undefined` for every other model, leaving {@link resolveOpenAIOutputTokenParam}
 * on its default `OPENAI_MAX_OUTPUT_TOKENS` clamp.
 */
export declare function resolveZaiReasoningOutputClamp(model: Model<"openai-completions">, compat: ResolvedOpenAICompat): number | undefined;
/**
 * Enable `tool_stream` for Z.AI/GLM-5.2 reasoning models when tools are present
 * (GLM-5.2 streams tool-call arguments incrementally and needs the flag to do so).
 */
export declare function applyChatCompletionsToolStream(params: OpenAICompletionsParams, model: Model<"openai-completions">, compat: ResolvedOpenAICompat): void;
export declare function isCompiledGrammarTooLargeStrictError(error: unknown, capturedErrorResponse: CapturedHttpErrorResponse | undefined): boolean;
export declare function shouldRetryWithoutStrictTools(error: unknown, capturedErrorResponse: CapturedHttpErrorResponse | undefined, strictToolsApplied: boolean, tools: Tool[] | undefined): boolean;
export declare const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string>;
export declare function isOpenAIResponsesProgressEvent(event: unknown): boolean;
export declare function encodeTextSignatureV1(id: string, phase?: TextSignatureV1["phase"]): string;
export declare function parseTextSignature(signature: string | undefined): {
    id: string;
    phase?: TextSignatureV1["phase"];
} | undefined;
export declare function encodeResponsesToolCallId(callId: string, itemId: string | null | undefined): string;
export declare function normalizeResponsesToolCallIdForTransform(id: string, model?: Model<Api>, source?: AssistantMessage): string;
export declare function collectKnownCallIds(messages: ResponseInput): Set<string>;
/** Scan replay items for call_ids that were originally custom tool calls. */
export declare function collectCustomCallIds(messages: ResponseInput): Set<string>;
/**
 * Convert orphan `function_call_output` / `custom_tool_call_output` items —
 * those whose `call_id` has no matching preceding `function_call` /
 * `custom_tool_call` in the same input — into assistant text notes.
 *
 * The Responses API rejects unpaired outputs with
 * `400 No tool call found for function call output with call_id …`. Orphans
 * sneak in through two paths today:
 *
 * - A previous turn's `providerPayload` snapshot replaces the input array via
 *   the `dt: false` splice (see {@link convertConversationMessages}), wiping
 *   the matching `function_call` while leaving the matching
 *   `function_call_output` queued in a later `toolResult`.
 * - A locally-rejected tool call (argument-validation failure, hook reject,
 *   aborted turn before the call streamed) produces a tool result without a
 *   `function_call` ever landing in any persisted provider payload.
 *
 * Dropping the result loses information the model needs to recover; sending
 * it as-is 400s the request. Folding it into an assistant `message` preserves
 * the payload (call_id + truncated output) while staying within the Responses
 * input grammar. Matches the behavior of {@link transformRequestBody} in the
 * codex provider — issue #1351 / regression of #472.
 */
export declare function repairOrphanResponsesToolOutputs(input: ResponseInput): ResponseInput;
/**
 * Synthesize a placeholder `function_call_output` / `custom_tool_call_output`
 * for every `function_call` / `custom_tool_call` whose `call_id` has no matching
 * output later in the same input. The Responses API rejects an unpaired call
 * with `400 No tool output found for function call …`.
 *
 * Orphan calls surface when the user branches/navigates the session tree to a
 * node that ends on a tool call (the tool-result child is excluded from the
 * reconstructed history) or when a turn is aborted/crashes after the call
 * streamed but before its result persisted. Dropping the call would erase the
 * assistant's action; a placeholder output keeps the call visible so the model
 * can recover (e.g. re-issue the call). Symmetric to
 * {@link repairOrphanResponsesToolOutputs}.
 */
export declare function repairOrphanResponsesToolCalls(input: ResponseInput): ResponseInput;
export declare function convertResponsesInputContent(content: string | Array<TextContent | ImageContent>, supportsImages: boolean, supportsImageDetailOriginal: boolean): ResponseInputContent[] | undefined;
export interface BuildResponsesInputOptions<TApi extends Api> {
    model: Model<TApi>;
    context: Context;
    strictResponsesPairing: boolean;
    supportsImageDetailOriginal: boolean;
    systemRole?: "system" | "developer";
    nativeHistory?: {
        replay: boolean;
        filterReasoning: boolean;
    };
    includeThinkingSignatures?: boolean;
    developerStringContent?: boolean;
    repairOrphanOutputs?: boolean;
}
export declare function buildResponsesInput<TApi extends Api>(options: BuildResponsesInputOptions<TApi>): ResponseInput;
export declare function convertResponsesAssistantMessage<TApi extends Api>(assistantMsg: AssistantMessage, model: Model<TApi>, msgIndex: number, knownCallIds: Set<string>, includeThinkingSignatures?: boolean, customCallIds?: Set<string>): ResponseInput;
export declare function appendResponsesToolResultMessages<TApi extends Api>(messages: ResponseInput, toolResult: ToolResultMessage, model: Model<TApi>, strictResponsesPairing: boolean, supportsImageDetailOriginal: boolean, knownCallIds: ReadonlySet<string>, customCallIds?: ReadonlySet<string>): void;
/**
 * Per-block accumulation helpers shared by the two Responses decode loops —
 * {@link processResponsesStream} (generic Responses) and the Codex stream
 * handler in `openai-codex-responses.ts`. Each endpoint keeps its own
 * item-routing, terminal handling, and transport bookkeeping; these own only
 * the leaf mutations on an already-resolved open block, so the
 * append/parse/finalize logic lives in exactly one place. The caller passes the
 * `contentIndex` its router resolved (generic uses `output.content.indexOf`;
 * Codex uses the open item's recorded index) so the emitted stream events match
 * each decoder's existing behavior byte-for-byte.
 */
type ResponsesToolCallBlock = ToolCall & {
    [kStreamingPartialJson]: string;
    [kStreamingLastParseLen]?: number;
};
export declare function appendReasoningSummaryPart(item: ResponseReasoningItem, part: ResponseReasoningItem["summary"][number]): void;
export declare function appendReasoningSummaryTextDelta(item: ResponseReasoningItem, block: ThinkingContent, delta: string, stream: AssistantMessageEventStream, output: AssistantMessage, contentIndex: number): void;
export declare function appendReasoningSummaryPartDone(item: ResponseReasoningItem, block: ThinkingContent, stream: AssistantMessageEventStream, output: AssistantMessage, contentIndex: number): void;
export declare function appendMessageContentPart(item: ResponseOutputMessage, part: ResponseContentPartAddedEvent["part"] | undefined): void;
export declare function appendMessageTextDelta(item: ResponseOutputMessage, block: TextContent, delta: string, stream: AssistantMessageEventStream, output: AssistantMessage, contentIndex: number, partType: "output_text" | "refusal"): void;
export declare function accumulateToolCallArgumentsDelta(block: ResponsesToolCallBlock, delta: string, stream: AssistantMessageEventStream, output: AssistantMessage, contentIndex: number): void;
/**
 * Finalize streamed function-call arguments from the authoritative `.done`
 * payload. The caller owns the `argumentsDone` flag (generic Responses sets it;
 * Codex's block shape has no such field), so this only rewrites `arguments` and
 * drops the transient accumulation fields.
 */
export declare function finalizeToolCallArgumentsDone(block: ResponsesToolCallBlock, args: string): void;
export declare function accumulateCustomToolCallInputDelta(block: ResponsesToolCallBlock, delta: string, stream: AssistantMessageEventStream, output: AssistantMessage, contentIndex: number): void;
export declare function finalizeCustomToolCallInputDone(block: ResponsesToolCallBlock, input: string): void;
export interface ProcessResponsesStreamOptions {
    onFirstToken?: () => void;
    onOutputItemDone?: (item: ResponseOutputItem) => void;
    /**
     * Called when a terminal `response.completed` or `response.incomplete` event
     * is successfully processed. Only invoked on the successful-completion path;
     * thrown failure (`response.failed`) and cancellation paths never call this.
     * Used by callers to detect premature stream closure (i.e. the stream ended
     * without a recognized terminal event).
     */
    onCompleted?: () => void;
    /**
     * Caller-requested service tier, used to bill the served tier when the
     * response omits the `service_tier` echo. Only applied for `provider: "openai"`.
     */
    requestServiceTier?: ServiceTier;
}
export declare function processResponsesStream<TApi extends Api>(openaiStream: AsyncIterable<ResponseStreamEvent>, output: AssistantMessage, stream: AssistantMessageEventStream, model: Model<TApi>, options?: ProcessResponsesStreamOptions): Promise<void>;
export declare function mapOpenAIResponsesStopReason(status: ResponseStatus | undefined): StopReason;
/**
 * Finalize any streamed toolCall block whose `output_item.done` never arrived
 * (lossy proxy, or a terminal event that raced the per-item done): parse the
 * accumulated `partialJson` into authoritative arguments and strip the transient
 * streaming fields so they never persist. Shared by the chat-Responses decoder
 * and the Codex decoder. Closed blocks already cleared these fields, so walking
 * the full content list leaves them untouched.
 */
export declare function finalizePendingResponsesToolCalls(output: AssistantMessage): void;
/**
 * Apply the Responses terminal stop-reason invariants shared by the chat-Responses
 * and Codex decoders: a turn that produced tool calls becomes `toolUse`, and a
 * Codex-lineage `end_turn: false` marker pauses the turn so the agent loop
 * re-samples instead of ending. Callers set `output.stopReason` from the wire
 * status first via {@link mapOpenAIResponsesStopReason}.
 */
export declare function promoteResponsesToolUseStopReason(output: AssistantMessage, endTurn: boolean | undefined): void;
/** Initial empty `AssistantMessage` that streaming providers accumulate into. */
export declare function createInitialResponsesAssistantMessage(api: Api, provider: string, modelId: string): AssistantMessage;
/** Extension fields we add on top of `ResponseCreateParamsStreaming` across the Responses-family providers. */
export type ResponsesSamplingParamsExtras = {
    top_p?: number;
    top_k?: number;
    min_p?: number;
    presence_penalty?: number;
    repetition_penalty?: number;
};
type CommonResponsesParams = ResponseCreateParamsStreaming & ResponsesSamplingParamsExtras;
type CommonSamplingOptions = Pick<StreamOptions, "temperature" | "topP" | "topK" | "minP" | "presencePenalty" | "repetitionPenalty" | "maxTokens"> & {
    serviceTier?: ServiceTier;
};
/**
 * Apply the common `StreamOptions` → Responses sampling-parameter mapping (max output tokens,
 * temperature, top-p/k, min-p, presence/repetition penalties, service tier). Mutates `params`.
 *
 * `max_output_tokens` is suppressed when {@link Model.omitMaxOutputTokens} is `true`, so
 * proxies (notably Ollama) that forward to upstream APIs with an unknown output-token cap
 * can let the upstream apply its own default instead of 400-ing on `maxTokens` values that
 * reflect the model's context window rather than the upstream output limit.
 */
export declare function applyCommonResponsesSamplingParams<P extends CommonResponsesParams>(params: P, options: CommonSamplingOptions | undefined, model: Pick<Model, "provider" | "omitMaxOutputTokens" | "maxTokens">): void;
type ReasoningOptions = {
    reasoning?: string;
    reasoningSummary?: "auto" | "detailed" | "concise" | null;
    disableReasoning?: boolean;
    toolChoice?: unknown;
};
export interface ApplyResponsesCompatPolicyOptions {
    reasoningSummary?: "auto" | "detailed" | "concise" | null;
    mapEffort?: (effort: string) => string;
}
export declare function applyResponsesCompatPolicy<P extends ResponseCreateParamsStreaming>(params: P, messages: ResponseInput, policy: OpenAICompatPolicy, options: ApplyResponsesCompatPolicyOptions | undefined): number;
/**
 * Apply reasoning-related Responses parameters. Default behavior comes from
 * catalog compat; include/omit arguments are explicit adapter-wrapper overrides.
 */
export declare function applyResponsesReasoningParams<P extends ResponseCreateParamsStreaming>(params: P, model: Model<"openai-responses" | "azure-openai-responses" | "openai-codex-responses">, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string, includeEncryptedReasoning?: boolean, omitReasoningEffort?: boolean): number;
/** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
export declare function populateResponsesUsageFromResponse(output: AssistantMessage, usage: {
    input_tokens?: number | null;
    output_tokens?: number | null;
    total_tokens?: number | null;
    prompt_cache_hit_tokens?: number | null;
    prompt_cache_miss_tokens?: number | null;
    input_tokens_details?: {
        cached_tokens?: number | null;
        cache_write_tokens?: number | null;
        orchestration_input_tokens?: number | null;
        orchestration_input_cached_tokens?: number | null;
    } | null;
    output_tokens_details?: {
        reasoning_tokens?: number | null;
        orchestration_output_tokens?: number | null;
    } | null;
} | null | undefined): void;
/**
 * Strict-prefix delta for stateful `previous_response_id` chaining (used by the
 * platform Responses provider and the Codex provider on both transports):
 * returns the input items the current request appends beyond the previous
 * request's input plus the previous response's output items, or null when the
 * request options differ or history mutated (the chain must break). Per-turn
 * `client_metadata` (e.g. rotating turn ids) is excluded from the option
 * comparison; codex-rs excludes it from the same check.
 */
export declare function buildResponsesDeltaInput<TItem extends ResponseInputItem | InputItem>(previous: {
    input?: TItem[];
} | undefined, previousResponseItems: readonly TItem[] | undefined, current: {
    input?: TItem[];
}): TItem[] | null;
export {};