/** * This file was auto-generated by Fern from our API Definition. */ import * as OctoAI from "../../../../index"; /** * @example * { * bestOf: 1, * echo: true, * frequencyPenalty: 1.1, * ignoreEos: true, * logPrompt: true, * logitBias: { * "string": { * "key": "value" * } * }, * loglikelihood: true, * logprobs: 1, * maxTokens: 1, * minP: 1.1, * model: "string", * n: 1, * peft: "string", * presencePenalty: 1.1, * prompt: "string", * repetitionPenalty: 1.1, * seed: 1, * stop: "string", * streamOptions: { * includeUsage: true * }, * suffix: "string", * temperature: 1.1, * topP: 1.1, * user: "string" * } */ export interface CreateCompletionStreamRequest { /** Number of sequences that are generated from the prompt.`best_of` must be greater than or equal to `n`. */ bestOf?: number; /** Echo back the prompt in addition to the completion. */ echo?: boolean; /** Penalizes new tokens based on their frequency in the generated text so far. */ frequencyPenalty?: number; /** Whether to ignore the EOS token and continue generating tokens after the EOS token is generated. */ ignoreEos?: boolean; /** OctoAI stores request prompt if True. */ logPrompt?: boolean; /** Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {'50256': -100} to prevent the <|endoftext|> token from being generated. */ logitBias?: Record; /** Return log probabilities for all prompt tokens excluding the first one from prefill step if True. */ loglikelihood?: boolean; /** Number of log probabilities to return per output token. */ logprobs?: number; /** Maximum number of tokens to generate per output sequence. */ maxTokens?: number; /** Sets a dynamic threshold of the top tokens to consider based on the probability value of the most likely next token. Following https://arxiv.org/abs/2407.01082. */ minP?: number; /** The identifier of the model to use. Can be a shared tenancy or custom model identifier. */ model: string; /** Number of output sequences to return. */ n?: number; /** Parameter-efficient fine-tuning ID. */ peft?: string; /** Penalizes new tokens based on whether they appear in the generated text so far. */ presencePenalty?: number; /** The prompt to generate completions from. */ prompt?: OctoAI.textGen.Prompt; /** Controls the likelihood of the model generating repeated texts. */ repetitionPenalty?: number; /** If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. */ seed?: number; /** Generation stop condition. */ stop?: OctoAI.textGen.Stop; /** If set, usageStats will be streamed on the last content-containing chunk. */ streamOptions?: OctoAI.textGen.StreamOptions; /** The suffix that comes after a completion of inserted text. */ suffix?: string; /** Controls the randomness of the sampling. */ temperature?: number; /** Controls the cumulative probability of the top tokens to consider. */ topP?: number; /** A unique identifier representing your end-user. */ user?: string; }