/** * This file was auto-generated by Fern from our API Definition. */ import * as OctoAI from "../../../../index"; /** * @example * { * frequencyPenalty: 1.1, * ignoreEos: true, * logPrompt: true, * logitBias: { * "string": { * "key": "value" * } * }, * loglikelihood: true, * logprobs: true, * maxTokens: 1, * messages: [{ * content: "string", * role: "string", * toolCalls: [{ * function: { * arguments: "string", * name: "string" * }, * id: { * "key": "value" * }, * type: { * "key": "value" * } * }] * }], * minP: 1.1, * model: "string", * n: 1, * peft: "string", * presencePenalty: 1.1, * repetitionPenalty: 1.1, * responseFormat: { * schema: { * "string": { * "key": "value" * } * }, * type: "string" * }, * stop: "string", * streamOptions: { * includeUsage: true * }, * temperature: 1.1, * toolChoice: "auto", * tools: [{ * function: { * description: "string", * name: "string", * parameters: { * "string": { * "key": "value" * } * } * }, * type: "function" * }], * topLogprobs: 1, * topP: 1.1, * user: "string" * } */ export interface CreateChatCompletionStreamRequest { /** Penalizes new tokens based on their frequency in the generated text so far. */ frequencyPenalty?: number; /** Whether to ignore the EOS token and continue generating tokens after the EOS token is generated. */ ignoreEos?: boolean; /** OctoAI stores request prompt if True. */ logPrompt?: boolean; /** Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {'50256': -100} to prevent the <|endoftext|> token from being generated. */ logitBias?: Record; /** Return log probabilities for all prompt tokens excluding the first one from prefill step if True. */ loglikelihood?: boolean; /** Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. */ logprobs?: boolean; /** Maximum number of tokens to generate per output sequence. */ maxTokens?: number; /** A list of messages comprising the conversation so far. */ messages: OctoAI.textGen.ChatMessage[]; /** Sets a dynamic threshold of the top tokens to consider based on the probability value of the most likely next token. Following https://arxiv.org/abs/2407.01082. */ minP?: number; /** The identifier of the model to use. Can be a shared tenancy or custom model identifier. */ model: string; /** Number of output sequences to return. */ n?: number; /** Parameter-efficient fine-tuning ID. */ peft?: string; /** Penalizes new tokens based on whether they appear in the generated text so far. */ presencePenalty?: number; /** Controls the likelihood of the model generating repeated texts. */ repetitionPenalty?: number; /** Allows specification of a response format and associated schema that will constrain the LLM output to that structure. For example, using the `json_object` type allows you to provide a desired json schema for the output to follow. */ responseFormat?: OctoAI.textGen.ChatCompletionResponseFormat; /** Generation stop condition. */ stop?: OctoAI.textGen.Stop; /** If set, usageStats will be streamed on the last content-containing chunk. */ streamOptions?: OctoAI.textGen.StreamOptions; /** Controls the randomness of the sampling. */ temperature?: number; /** Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present. */ toolChoice?: OctoAI.textGen.CreateChatCompletionStreamRequestToolChoice; /** A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported. */ tools?: OctoAI.textGen.ToolDefinition[]; /** An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used. */ topLogprobs?: number; /** Controls the cumulative probability of the top tokens to consider. */ topP?: number; /** A unique identifier representing your end-user. */ user?: string; }