/**
 * (C) Copyright IBM Corp. 2025-2026.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
import type { Stream } from "../lib/common.mjs";
import type { CreateChatCompletionsParams } from "./types/chat/request.mjs";
import type { ChatsResponse } from "./types/chat/response.mjs";
import type { ObjectStreamed } from "../types/common.mjs";
import type { EmbeddingResponse } from "./types/embeddings/response.mjs";
import type { CreateEmbeddingsParams } from "./types/embeddings/request.mjs";
import type { CreateCompletionsParams } from "./types/text_completions/request.mjs";
import type { CompletionsResponse } from "./types/text_completions/response.mjs";
import type { APIBaseService } from "../base/base.mjs";
import type { Response } from "../base/types/base.mjs";
export type ChatObjectStream = ObjectStreamed<ChatsResponse>;
export type TextCompletionStream = ObjectStreamed<CompletionsResponse>;
/** Abstract class for handling text completion requests. */
export declare abstract class Completions {
    protected client: APIBaseService;
    /**
     * Constructor for Completions class.
     *
     * @param {APIBaseService} gateway - The APIBaseService instance.
     */
    constructor(gateway: APIBaseService);
    abstract create(params: Record<string, any>): Promise<Record<string, any>>;
}
export type ChatCompletionResult<S extends boolean | undefined, R extends boolean | undefined> = S extends true ? R extends false ? Stream<string> : Stream<ChatObjectStream> : Response<ChatsResponse>;
export type CompletionResult<S extends boolean | undefined, R extends boolean | undefined> = S extends true ? R extends false ? Stream<string> : Stream<TextCompletionStream> : Response<CompletionsResponse>;
/** Class for handling chat completion requests. */
export declare class ChatCompletions extends Completions {
    /**
     * Create Chat Completions.
     *
     * Generate a chat completion based on the provided messages and parameters using the provided
     * model.
     *
     * @param {Object} params - The parameters to send to the service.
     * @param {ChatsMessage[]} params.messages - A list of messages comprising the chat conversation
     *   so far. Depending on the model you use, different message types (modalities) are supported,
     *   like `"text"`, `"images"`, and `"audio"`.
     * @param {string} params.model - ID or alias of the model to forward the chat request to.
     * @param {JsonObject} [params.audio] - Parameters for audio output. Only required when audio
     *   output is requested with modalities: `["audio"]`.
     *
     *   See: [OpenAI's Audio Guide](https://platform.openai.com/docs/guides/audio) for more
     *   information.
     * @param {ChatsCache} [params.cache] - Caching configuration for a request. Cache is only
     *   supported for non-streaming requests.
     * @param {number} [params.frequencyPenalty] - A number between `-2.0` and `2.0`. Positive values
     *   penalize new tokens based on their existing frequency in the text so far, decreasing the
     *   model's likelihood to repeat the same line verbatim.
     * @param {ChatsFunctionCall} [params.functionCall] - Controls which (if any) function is called
     *   by the model.
     *
     *   - `"none"` means the model will not call a function and instead generates a message.
     *   - `"auto"` means the model can pick between generating a message or calling a function.
     *   - Specifying a particular function via `{"name": "my_function"}` forces the model to call that
     *       function.
     *
     *   `"none"` is the default when no functions are present. `"auto"` is the default if functions are
     *   present.
     *
     *   Deprecated: `function_call` has been deprecated by OpenAI in favor of `tool_choice`.
     * @param {JsonObject} [params.functions] - A list of functions the model may generate JSON inputs
     *   for.
     *
     *   Deprecated: `functions` has been deprecated by OpenAI in favor of `tools`.
     * @param {JsonObject} [params.logitBias] - Modifies the likelihood of specified tokens appearing
     *   in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the
     *   tokenizer) to an associated bias value from `-100` to `100`. Mathematically, the bias is
     *   added to the logits generated by the model prior to sampling. The exact effect will vary per
     *   model, but values between `-1` and `1` should decrease or increase likelihood of selection;
     *   values like `-100` or `100` should result in a ban or exclusive selection of the relevant
     *   token.
     * @param {boolean} [params.logprobs] - Indicates whether to return log probabilities of the
     *   output tokens or not. If `true`, returns the log probabilities of each output token returned
     *   in the content of message.
     * @param {number} [params.maxCompletionTokens] - Specifies an upper bound for the number of
     *   tokens that can be generated for a completion, including visible output tokens and [reasoning
     *   tokens][reasoning tokens].
     *
     * [reasoning tokens]: https://platform.openai.com/docs/guides/reasoning.
     * @param {number} [params.maxTokens] - Specifies a maximum number of tokens that can be generated
     *   in the chat completion. This value can be used to control costs for text generated via API.
     *
     *   Deprecated: `max_tokens` has been deprecated by OpenAI in favor of `max_completion_tokens`, and
     *   is not compatible with `o1` series models.
     * @param {JsonObject} [params.metadata] - Contains developer-defined tags and values used for
     *   filtering completions.
     * @param {string[]} [params.modalities] - Specifies the output types that you would like the
     *   model to generate for this request. Most models are capable of generating text, which is the
     *   default (`["text"]`). Some models can generate audio. For OpenAI, the `gpt-4o-audio-preview`
     *   model can be used to [generate audio][generate audio]. To request that this model generate
     *   both text and audio responses, you can use `["text", "audio"]`.
     *
     * [generate audio]: https://platform.openai.com/docs/guides/audio.
     * @param {number} [params.n] - Specifies how many chat completion choices to generate for each
     *   input message.
     *
     *   Note: you will be charged based on the number of generated tokens across all choices, keep
     *   `"n"` set to `1` to minimize costs.
     * @param {boolean} [params.parallelToolCalls] - Specifies whether to enable parallel function
     *   calling during tool use.
     * @param {ChatsPrediction} [params.prediction] - The configuration for a [Predicted
     *   Output][Predicted Output], which can greatly improve response times when large parts of the
     *   model response are known ahead of time. This is most common when you are regenerating a file
     *   with only minor changes to most of the content.
     *
     * [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs.
     * @param {number} [params.presencePenalty] - A number between `-2.0` and `2.0`. Positive values
     *   penalize new tokens based on whether they appear in the text so far, increasing the model's
     *   likelihood to talk about new topics.
     * @param {string} [params.reasoningEffort] - Constrains effort on reasoning for reasoning models.
     *   For OpenAI, currently supported by `o1` models only. Reducing reasoning effort can result in
     *   faster responses and fewer tokens used on reasoning in a response.
     * @param {ChatsResponseFormat} [params.responseFormat] - An object specifying the format that the
     *   model must output.
     *
     *   - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured
     *       Outputs][Structured Outputs] which ensures the model will match your supplied JSON
     *       schema.
     *   - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model
     *       generates is valid JSON.
     *
     *   Important: when using JSON mode, you must also instruct the model to produce JSON yourself via
     *   a system or user message. Without this, the model may generate an unending stream of
     *   whitespace until the generation reaches the token limit, resulting in a long-running and
     *   seemingly "stuck" request. Also note that the message content may be partially cut off if
     *   `"finish_reason"` is set to `"length"`, which indicates the generation exceeded `max_tokens`
     *   or the conversation exceeded the max context length.
     *
     * [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs.
     * @param {JsonObject} [params.router] - Specifies model routing configurations for the request.
     * @param {number} [params.seed] - The seed for the model request. For OpenAI, this feature is in
     *   Beta. If specified, OpenAI's system will make a best effort attempt to sample
     *   deterministically, such that repeated requests with the same seed and parameters should
     *   return the same result. Determinism is not guaranteed, and you should refer to the
     *   `system_fingerprint` response parameter to monitor changes in the backend.
     * @param {string} [params.serviceTier] - The service tier used for processing a request.
     * @param {string[]} [params.stop] - Specifies up to 4 sequences where the API will stop
     *   generating further tokens.
     * @param {boolean} [params.store] - Indicates whether to store the output of this chat completion
     *   request for use in OpenAI's [model distillation][model distillation] or [evals][evals]
     *   products.
     *
     * [model distillation]: https://platform.openai.com/docs/guides/distillation
     *
     * [evals]: https://platform.openai.com/docs/guides/evals.
     * @param {boolean} [params.stream] - Indicates whether to stream the model response to the user.
     *   If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as
     *   data-only server-sent events as they become available, with the stream terminated by a data:
     *   `[DONE]` message.
     * @param {StreamOptions} [params.streamOptions] - Options for streaming response. Only set this
     *   when you set `stream` to `true`.
     * @param {number} [params.temperature] - Specifies what sampling temperature to use. Higher
     *   values like `0.8` will make the output more random, while lower values like `0.2` will make
     *   it more focused and deterministic.
     *
     *   Note: OpenAI generally recommends altering this or `top_p` but not both.
     * @param {ChatsToolChoice} [params.toolChoice] - Controls which (if any) tool is called by the
     *   model.
     *
     *   - `"none"` means the model will not call any tool and instead generates a message.
     *   - `"auto"` means the model can pick between generating a message or calling one or more tools.
     *   - `"required"` means the model must call one or more tools.
     *   - Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}`
     *       forces the model to call that tool.
     *
     *   `"none"` is the default when no tools are present. `"auto"` is the default if tools are
     *   present.
     * @param {ChatsRequestTool[]} [params.tools] - A list of tools the model may call. Currently,
     *   only functions are supported as tools. Use this to provide a list of functions the model may
     *   generate JSON inputs for. A max of 128 functions are supported.
     * @param {number} [params.topLogprobs] - An integer between `0` and `20` specifying the number of
     *   most likely tokens to return at each token position, each with an associated log probability.
     *   LogProbs must be set to `true` if this parameter is used.
     * @param {number} [params.topP] - An alternative to sampling with `temperature`, called nucleus
     *   sampling, where the model considers the results of the tokens with `top_p` probability mass.
     *   Example: `0.1` means only the tokens comprising the top 10% probability mass are considered.
     *
     *   Note: OpenAI generally recommends altering this or `temperature` but not both.
     * @param {string} [params.user] - A unique identifier representing your end-user, which can help
     *   OpenAI to monitor and detect abuse.
     * @param {boolean} [params.returnObject] - Flag that indicates return type. Set 'true' to return
     *   objects, 'false' to. Default: 'true'
     * @param {AbortSignal} [params.signal] - Signal from AbortController
     * @param {OutgoingHttpHeaders} [params.headers] - Custom request headers
     * @returns {Promise<Response<ChatsResponse> | Stream<string | ChatObjectStream>>} Either a
     *   Response of ChatsResponse or a Stream.
     * @throws {Error} If validation fails or an error occurs during the request.
     */
    create<S extends boolean | undefined = false, R extends boolean | undefined = undefined>(params: CreateChatCompletionsParams & {
        stream?: S;
        returnObject?: R;
    }): Promise<ChatCompletionResult<S, R>>;
}
/** Class for handling embedding completion requests. */
export declare class EmbeddingCompletions extends Completions {
    /**
     * Create Embeddings completions.
     *
     * Generate embeddings based on the provided input using the provided model.
     *
     * @param {Object} params - The parameters to send to the service.
     * @param {EmbeddingsInput} params.input - Input text to embed, encoded as a string, array of
     *   strings, array of integers, or array of integer arrays. The input must not exceed the max
     *   input tokens for the model (8192 tokens for OpenAI's `text-embedding-ada-002`) and cannot be
     *   an empty string. Any array must be 2048 dimensions or less. Some models may also impose a
     *   limit on total number of tokens summed across inputs.
     * @param {string} params.model - ID of the model to use.
     * @param {number} [params.dimensions] - Number of dimensions the resulting output embeddings
     *   should have. For OpenAI, only supported in `text-embedding-3` and later models.
     * @param {string} [params.encodingFormat] - Format to return the embeddings in. Can be either
     *   `"float"` or `"base64"`.
     * @param {string} [params.user] - A unique identifier representing your end-user.
     * @param {AbortSignal} [params.signal] - Signal from AbortController
     * @param {OutgoingHttpHeaders} [params.headers] - Custom request headers
     * @returns {Promise<Response<EmbeddingResponse>>} - Embeddings response for provided text
     * @throws {Error} If validation fails or an error occurs during the request.
     */
    create(params: CreateEmbeddingsParams): Promise<Response<EmbeddingResponse>>;
}
/** Class for handling basic text completion requests. */
export declare class GenerateTextCompletions extends Completions {
    /**
     * Create Text Completions.
     *
     * Generate a text completion based on the provided prompt and parameters using the provided
     * model.
     *
     * @param {Object} params - The parameters to send to the service.
     * @param {string} params.model - Model is the ID of the model to use.
     * @param {string} params.prompt - Prompt(s) to generate completions for, encoded as a string,
     *   array of strings, array of tokens, or array of token arrays.
     *
     *   Note: `<|endoftext|>` is the document separator that the model sees during training, so if a
     *   prompt is not specified the model will generate as if from the beginning of a new document.
     * @param {number} [params.bestOf] - Generates `best_of` number of completions server-side and
     *   returns the "best" (the one with the highest log probability per token). Results cannot be
     *   streamed. When used with `n`, `best_of` controls the number of candidate completions and `n`
     *   specifies how many to return – `best_of` must be greater than `n`.
     *
     *   Note: Because this parameter generates many completions, it can quickly consume your token
     *   quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and
     *   `stop`.
     * @param {CompletionsCache} [params.cache] - Caching configuration for the request. Cache is only
     *   supported for non-streaming requests.
     * @param {boolean} [params.echo] - Echo back the prompt in addition to the completion.
     * @param {number} [params.frequencyPenalty] - A number between `-2.0` and `2.0`. Positive values
     *   penalize new tokens based on their existing frequency in the text so far, decreasing the
     *   model's likelihood to repeat the same line verbatim.
     * @param {JsonObject} [params.logitBias] - Used to modify the likelihood of specified tokens
     *   appearing in the completion. Accepts a JSON object that maps tokens (specified by their token
     *   ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this
     *   tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits
     *   generated by the model prior to sampling.
     *
     *   The exact effect will vary per model, but:
     *
     *   - Values between `-1` and `1` should decrease or increase likelihood of selection and
     *   - Values like `-100` or `100` should result in a ban or exclusive selection of the relevant
     *       token.
     *
     *   As an example, you can pass `{"50256": -100}` to prevent the `<|endoftext|>` token from being
     *   generated.
     * @param {number} [params.logprobs] - The number of most likely output tokens to include the log
     *   probabilities of, as well the chosen tokens. For example, if `logprobs` is `5`, the API will
     *   return a list of the 5 most likely tokens. The API will always return the `logprob` of the
     *   sampled token, so there may be up to `logprobs+1` elements in the response. The maximum value
     *   for `logprobs` is `5`.
     * @param {number} [params.maxTokens] - The maximum number of tokens that can be generated in the
     *   completion. The token count of your prompt plus `max_tokens` cannot exceed the model's
     *   context length.
     * @param {JsonObject} [params.metadata] - Contains developer-defined tags and values used for
     *   filtering completions.
     * @param {number} [params.n] - Specifies how many completions to generate for each prompt.
     *
     *   Note: Because this parameter generates many completions, it can quickly consume your token
     *   quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and
     *   `stop`.
     * @param {number} [params.presencePenalty] - A number between `-2.0` and `2.0`. Positive values
     *   penalize new tokens based on whether they appear in the text so far, increasing the model's
     *   likelihood to talk about new topics.
     * @param {ModelRouter} [params.router] - Router is the model routing configuration for the
     *   request.
     * @param {number} [params.seed] - The seed for the model request. If specified, OpenAI's system
     *   will make a best effort to sample deterministically, such that repeated requests with the
     *   same `seed` and parameters should return the same result.
     *
     *   Determinism is not guaranteed, and you should refer to the `system_fingerprint` response
     *   parameter to monitor changes in the backend.
     * @param {string[]} [params.stop] - Specifies up to 4 sequences where the API will stop
     *   generating further tokens.
     * @param {boolean} [params.stream] - Indicates whether to stream back partial progress. If set,
     *   tokens will be sent as data-only [server-sent events][server-sent events] as they become
     *   available, with the stream terminated by a `data: [DONE]` message.
     *
     * [server-sent events]: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format.
     * @param {StreamOptions} [params.streamOptions] - Options for streaming response. Only set this
     *   when you set `stream` to `true`.
     * @param {string} [params.suffix] - Text that comes after a completion of inserted text. On
     *   OpenAI, this parameter is only supported for `gpt-3.5-turbo-instruct`.
     * @param {number} [params.temperature] - Specifies what temperature to use for sample, between
     *   `0` and `2`. Higher values like `0.8` will make the output more random, while lower values
     *   like `0.2` will make it more focused and deterministic.
     *
     *   Note: OpenAI generally recommends altering this or `top_p` but not both.
     * @param {number} [params.topP] - An alternative to sampling with `temperature`, called nucleus
     *   sampling, where the model considers the results of the tokens with `top_p` probability mass.
     *   So `0.1` means only the tokens comprising the top 10% probability mass are considered.
     *
     *   Note: OpenAI generally recommends altering this or `temperature` but not both.
     * @param {string} [params.user] - A unique identifier representing your end-user, which can help
     *   Services to monitor and detect abuse.
     * @param {boolean} [params.returnObject] - Flag that indicates return type. Set 'true' to return
     *   objects, 'false' to. Default: 'true'
     * @param {AbortSignal} [params.signal] - Signal from AbortController
     * @param {OutgoingHttpHeaders} [params.headers] - Custom request headers
     * @returns {Promise<Response<CompletionsResponse> | Stream<string | TextCompletionStream>>}
     *   Stream of TextCompletionStream.
     * @throws {Error} If validation fails or an error occurs during the request.
     */
    create<S extends boolean | undefined = false, R extends boolean | undefined = undefined>(params: CreateCompletionsParams & {
        stream?: S;
        returnObject?: R;
    }): Promise<CompletionResult<S, R>>;
}
//# sourceMappingURL=completions.d.mts.map