// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. import { APIResource } from '../resource'; import * as Core from '../core'; import { Stream } from '../streaming'; export class Completions extends APIResource { /** * Completions * * @example * ```ts * const completion = await client.completions.create({ * model: 'model', * prompt: 'string', * }); * ``` */ create( params: CompletionCreateParamsNonStreaming, options?: Core.RequestOptions, ): Core.APIPromise; create( params: CompletionCreateParamsStreaming, options?: Core.RequestOptions, ): Core.APIPromise>; create( params: CompletionCreateParamsBase, options?: Core.RequestOptions, ): Core.APIPromise | Completion>; create( params: CompletionCreateParams, options?: Core.RequestOptions, ): Core.APIPromise | Core.APIPromise> { const { 'CF-RAY': cfRay, 'X-Amz-Cf-Id': xAmzCfId, 'X-delay-time': xDelayTime, ...body } = params; return this._client.post('/v1/completions', { body, ...options, stream: body.stream ?? false, headers: { ...(cfRay != null ? { 'CF-RAY': cfRay } : undefined), ...(xAmzCfId != null ? { 'X-Amz-Cf-Id': xAmzCfId } : undefined), ...(xDelayTime?.toString() != null ? { 'X-delay-time': xDelayTime?.toString() } : undefined), ...options?.headers, }, }) as Core.APIPromise | Core.APIPromise>; } } export type Completion = | Completion.CompletionResponse | Completion.CompletionChunkResponse | Completion.ErrorChunkResponse; export namespace Completion { export interface CompletionResponse { id: string; choices: Array; created: number; model: string; object: 'text_completion'; system_fingerprint: string; time_info?: CompletionResponse.TimeInfo | null; usage?: CompletionResponse.Usage | null; [k: string]: unknown; } export namespace CompletionResponse { export interface Choice { index: number; finish_reason?: 'stop' | 'length' | 'content_filter' | null; logprobs?: Choice.Logprobs | null; reasoning_logprobs?: Choice.ReasoningLogprobs | null; text?: string | null; tokens?: Array | null; [k: string]: unknown; } export namespace Choice { export interface Logprobs { text_offset?: Array | null; token_logprobs?: Array | null; tokens?: Array | null; top_logprobs?: Array<{ [key: string]: number }> | null; [k: string]: unknown; } export interface ReasoningLogprobs { content: Array | null; refusal: Array | null; [k: string]: unknown; } export namespace ReasoningLogprobs { export interface Content { token: string; logprob: number; top_logprobs: Array; bytes?: Array | null; [k: string]: unknown; } export namespace Content { export interface TopLogprob { token: string; logprob: number; bytes?: Array | null; [k: string]: unknown; } } export interface Refusal { token: string; logprob: number; top_logprobs: Array; bytes?: Array | null; [k: string]: unknown; } export namespace Refusal { export interface TopLogprob { token: string; logprob: number; bytes?: Array | null; [k: string]: unknown; } } } } export interface TimeInfo { completion_time?: number; prompt_time?: number; queue_time?: number; total_time?: number; [k: string]: unknown; } export interface Usage { completion_tokens?: number; completion_tokens_details?: Usage.CompletionTokensDetails | null; prompt_tokens?: number; prompt_tokens_details?: Usage.PromptTokensDetails | null; total_tokens?: number; [k: string]: unknown; } export namespace Usage { export interface CompletionTokensDetails { accepted_prediction_tokens?: number | null; rejected_prediction_tokens?: number | null; [k: string]: unknown; } export interface PromptTokensDetails { cached_tokens?: number; [k: string]: unknown; } } } export interface CompletionChunkResponse { id: string; created: number; model: string; object: 'chat.completion.chunk' | 'text_completion'; system_fingerprint: string; choices?: Array | null; service_tier?: string | null; time_info?: CompletionChunkResponse.TimeInfo | null; usage?: CompletionChunkResponse.Usage | null; [k: string]: unknown; } export namespace CompletionChunkResponse { export interface Choice { index: number; delta?: Choice.Delta | null; finish_reason?: 'stop' | 'length' | 'content_filter' | 'tool_calls' | null; logprobs?: Choice.Logprobs | null; reasoning_logprobs?: Choice.ReasoningLogprobs | null; text?: string | null; tokens?: Array | null; [k: string]: unknown; } export namespace Choice { export interface Delta { content?: string | null; reasoning?: string | null; role?: 'assistant' | 'user' | 'system' | 'tool' | null; tokens?: Array | null; tool_calls?: Array | null; [k: string]: unknown; } export namespace Delta { /** * Streaming only. Represents a function call in an assistant tool call. */ export interface ToolCall { /** * Streaming only. Represents a function in an assistant tool call. */ function: ToolCall.Function; type: 'function'; id?: string | null; index?: number | null; [k: string]: unknown; } export namespace ToolCall { /** * Streaming only. Represents a function in an assistant tool call. */ export interface Function { arguments?: string | null; name?: string | null; [k: string]: unknown; } } } export interface Logprobs { text_offset?: Array | null; token_logprobs?: Array | null; tokens?: Array | null; top_logprobs?: Array<{ [key: string]: number }> | null; [k: string]: unknown; } export interface ReasoningLogprobs { content: Array | null; refusal: Array | null; [k: string]: unknown; } export namespace ReasoningLogprobs { export interface Content { token: string; logprob: number; top_logprobs: Array; bytes?: Array | null; [k: string]: unknown; } export namespace Content { export interface TopLogprob { token: string; logprob: number; bytes?: Array | null; [k: string]: unknown; } } export interface Refusal { token: string; logprob: number; top_logprobs: Array; bytes?: Array | null; [k: string]: unknown; } export namespace Refusal { export interface TopLogprob { token: string; logprob: number; bytes?: Array | null; [k: string]: unknown; } } } } export interface TimeInfo { completion_time?: number; prompt_time?: number; queue_time?: number; total_time?: number; [k: string]: unknown; } export interface Usage { completion_tokens?: number; completion_tokens_details?: Usage.CompletionTokensDetails | null; prompt_tokens?: number; prompt_tokens_details?: Usage.PromptTokensDetails | null; total_tokens?: number; [k: string]: unknown; } export namespace Usage { export interface CompletionTokensDetails { accepted_prediction_tokens?: number | null; rejected_prediction_tokens?: number | null; [k: string]: unknown; } export interface PromptTokensDetails { cached_tokens?: number; [k: string]: unknown; } } } export interface ErrorChunkResponse { error: ErrorChunkResponse.Error; status_code: number; [k: string]: unknown; } export namespace ErrorChunkResponse { export interface Error { id?: string | null; code?: string | null; message?: string | null; param?: string | null; type?: string | null; [k: string]: unknown; } } } // This enables us to do matching against the parameter to overload the function and know what the // return type will be (whether with or without streaming). export type CompletionCreateParams = CompletionCreateParamsNonStreaming | CompletionCreateParamsStreaming; export interface CompletionCreateParamsNonStreaming extends CompletionCreateParamsBase { stream?: false | null; } export interface CompletionCreateParamsStreaming extends CompletionCreateParamsBase { stream: true; } export interface CompletionCreateParamsBase { /** * Body param: */ model: string; /** * Body param: The prompt(s) to generate completions for, encoded as a string, * array of strings, array of tokens, or array of token arrays. */ prompt: string | Array | Array | Array>; /** * Body param: Generates `best_of` completions server-side and returns the "best" * (the one with the highest log probability per token). Results cannot be * streamed. When used with `n`, `best_of` controls the number of candidate * completions and `n` specifies how many to return – `best_of` must be greater * than `n`. **Note:** Because this parameter generates many completions, it can * quickly consume your token quota. Use carefully and ensure that you have * reasonable settings for `max_tokens` and `stop` */ best_of?: number | null; /** * Body param: Echo back the prompt in addition to the completion */ echo?: boolean | null; /** * Body param: Number between -2.0 and 2.0. Positive values penalize new tokens * based on their existing frequency in the text so far, decreasing the model's * likelihood to repeat the same line verbatim. */ frequency_penalty?: number | null; /** * Body param: The grammar root used for structured output generation. */ grammar_root?: string | null; /** * Body param: Modify the likelihood of specified tokens appearing in the * completion. * * Accepts a JSON object that maps tokens (specified by their token ID in the * tokenizer) to an associated bias value from -100 to 100. Mathematically, the * bias is added to the logits generated by the model prior to sampling. The exact * effect will vary per model, but values between -1 and 1 should decrease or * increase likelihood of selection; values like -100 or 100 should result in a ban * or exclusive selection of the relevant token. */ logit_bias?: { [key: string]: number } | null; /** * Body param: Include the log probabilities on the logprobs most likely output * tokens, as well the chosen tokens. For example, if logprobs is 5, the API will * return a list of the 5 most likely tokens. The API will always return the * logprob of the sampled token, so there may be up to logprobs+1 elements in the * response. */ logprobs?: number | null; /** * Body param: The maximum number of tokens that can be generated in the chat * completion. The total length of input tokens and generated tokens is limited by * the model's context length. */ max_tokens?: number | null; /** * Body param: The minimum number of tokens to generate for a completion. If not * specified or set to 0, the model will generate as many tokens as it deems * necessary. Setting to -1 sets to max sequence length. */ min_tokens?: number | null; /** * Body param: How many chat completion choices to generate for each input message. * Note that you will be charged based on the number of generated tokens across all * of the choices. Keep n as 1 to minimize costs. */ n?: number | null; /** * Body param: Number between -2.0 and 2.0. Positive values penalize new tokens * based on whether they appear in the text so far, increasing the model's * likelihood to talk about new topics. */ presence_penalty?: number | null; /** * Body param: Determines how reasoning is returned in the response. If set to * `parsed`, the reasoning will be returned in the `reasoning` field of the * response message as a string. If set to `raw`, the reasoning will be returned in * the `content` field of the response message with special tokens. If set to * `hidden`, the reasoning will not be returned in the response. */ reasoning_format?: 'none' | 'parsed' | 'text_parsed' | 'raw' | 'hidden'; /** * Body param: Return raw tokens instead of text */ return_raw_tokens?: boolean | null; /** * Body param: If specified, our system will make a best effort to sample * deterministically, such that repeated requests with the same `seed` and * parameters should return the same result. Determinism is not guaranteed. */ seed?: number | null; /** * Body param: Up to 4 sequences where the API will stop generating further tokens. * The returned text will not contain the stop sequence. */ stop?: string | Array | null; /** * Body param: */ stream?: boolean | null; /** * Body param: Options for streaming. */ stream_options?: CompletionCreateParams.StreamOptions | null; /** * Body param: The suffix that comes after a completion of inserted text. (OpenAI * feature, not supported) */ suffix?: string | null; /** * Body param: What sampling temperature to use, between 0 and 1.5. Higher values * like 0.8 will make the output more random, while lower values like 0.2 will make * it more focused and deterministic. We generally recommend altering this or * `top_p` but not both. */ temperature?: number | null; /** * Body param: An alternative to sampling with temperature, called nucleus * sampling, where the model considers the results of the tokens with top_p * probability mass. So 0.1 means only the tokens comprising the top 10% * probability mass are considered. We generally recommend altering this or * `temperature` but not both. */ top_p?: number | null; /** * Body param: A unique identifier representing your end-user, which can help * Cerebras to monitor and detect abuse. */ user?: string | null; /** * Header param: */ 'CF-RAY'?: string; /** * Header param: */ 'X-Amz-Cf-Id'?: string; /** * Header param: */ 'X-delay-time'?: number; } export namespace CompletionCreateParams { /** * Options for streaming. */ export interface StreamOptions { include_usage?: boolean | null; [k: string]: unknown; } } export declare namespace Completions { export { type Completion as Completion, type CompletionCreateParams as CompletionCreateParams }; }