/**
 * @module teams-ai
 */
/**
 * Copyright (c) Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License.
 */
import { TurnContext } from 'botbuilder';
import { Memory } from '../MemoryFork';
import { PromptCompletionModel, PromptCompletionModelResponseReceivedEvent } from '../models';
import { PromptFunctions, PromptTemplate } from '../prompts';
import { Tokenizer } from '../tokenizers';
import { PromptResponse } from '../types';
import { PromptResponseValidator } from '../validators';
/**
 * Options for an LLMClient instance.
 * @template TContent Optional. Type of message content returned for a 'success' response. The `response.message.content` field will be of type TContent. Defaults to `any`.
 */
export interface LLMClientOptions<TContent = any> {
    /**
     * AI model to use for completing prompts.
     */
    model: PromptCompletionModel;
    /**
     * Prompt to use for the conversation.
     */
    template: PromptTemplate;
    /**
     * Optional. Memory variable used for storing conversation history.
     * @remarks
     * The history will be stored as a `Message[]` and the variable defaults to `conversation.history`.
     */
    history_variable?: string;
    /**
     * Optional. Memory variable used for storing the users input message.
     * @remarks
     * The users input is expected to be a `string` but it's optional and defaults to `temp.input`.
     */
    input_variable?: string;
    /**
     * Optional. Maximum number of conversation history messages to maintain.
     * @remarks
     * The number of tokens worth of history included in the prompt is controlled by the
     * `ConversationHistory` section of the prompt. This controls the automatic pruning of the
     * conversation history that's done by the LLMClient instance. This helps keep your memory from
     * getting too big and defaults to a value of `10` (or 5 turns.)
     */
    max_history_messages?: number;
    /**
     * Optional. Maximum number of automatic repair attempts the LLMClient instance will make.
     * @remarks
     * This defaults to a value of `3` and can be set to `0` if you wish to disable repairing of bad responses.
     */
    max_repair_attempts?: number;
    /**
     * Optional. Tokenizer to use when rendering the prompt or counting tokens.
     * @remarks
     * If not specified, a new instance of `GPTTokenizer` will be created. GPT3Tokenizer can be passed in for gpt-3 models.
     */
    tokenizer?: Tokenizer;
    /**
     * Optional. Response validator to use when completing prompts.
     * @remarks
     * If not specified a new instance of `DefaultResponseValidator` will be created. The
     * DefaultResponseValidator returns a `Validation` that says all responses are valid.
     */
    validator?: PromptResponseValidator<TContent>;
    /**
     * Optional. If true, any repair attempts will be logged to the console.
     */
    logRepairs?: boolean;
    /**
     * Optional message to send a client at the start of a streaming response.
     */
    startStreamingMessage?: string;
    /**
     * Optional handler to run when a stream is about to conclude.
     */
    endStreamHandler?: PromptCompletionModelResponseReceivedEvent;
    /**
     * If true, the feedback loop will be enabled for streaming responses.
     */
    enableFeedbackLoop?: boolean;
    /**
     * The type of the feedback loop.
     */
    feedbackLoopType?: 'default' | 'custom';
}
/**
 * The configuration of the LLMClient instance.
 */
export interface ConfiguredLLMClientOptions<TContent = any> {
    /**
     * AI model used for completing prompts.
     */
    model: PromptCompletionModel;
    /**
     * Memory variable used for storing conversation history.
     */
    history_variable: string;
    /**
     * Memory variable used for storing the users input message.
     */
    input_variable: string;
    /**
     * Maximum number of conversation history messages that will be persisted to memory.
     */
    max_history_messages: number;
    /**
     * Maximum number of automatic repair attempts that will be made.
     */
    max_repair_attempts: number;
    /**
     * Prompt used for the conversation.
     */
    template: PromptTemplate;
    /**
     * Tokenizer used when rendering the prompt or counting tokens.
     */
    tokenizer: Tokenizer;
    /**
     * Response validator used when completing prompts.
     */
    validator: PromptResponseValidator<TContent>;
    /**
     * If true, any repair attempts will be logged to the console.
     */
    logRepairs: boolean;
}
/**
 * LLMClient class that's used to complete prompts.
 * @remarks
 * Each wave, at a minimum needs to be configured with a `client`, `prompt`, and `prompt_options`.
 *
 * Configuring the wave to use a `validator` is optional but recommended. The primary benefit to
 * using LLMClient is it's response validation and automatic response repair features. The
 * validator acts as guard and guarantees that you never get an malformed response back from the
 * model. At least not without it being flagged as an `invalid_response`.
 *
 * Using the `JSONResponseValidator`, for example, guarantees that you only ever get a valid
 * object back from `completePrompt()`. In fact, you'll get back a fully parsed object and any
 * additional response text from the model will be dropped. If you give the `JSONResponseValidator`
 * a JSON Schema, you will get back a strongly typed and validated instance of an object in
 * the returned `response.message.content`.
 *
 * When a validator detects a bad response from the model, it gives the model "feedback" as to the
 * problem it detected with its response and more importantly an instruction that tells the model
 * how it should repair the problem. This puts the wave into a special repair mode where it first
 * forks the memory for the conversation and then has a side conversation with the model in an
 * effort to get it to repair its response. By forking the conversation, this isolates the bad
 * response and prevents it from contaminating the main conversation history. If the response can
 * be repaired, the wave will un-fork the memory and use the repaired response in place of the
 * original bad response. To the model it's as if it never made a mistake which is important for
 * future turns with the model. If the response can't be repaired, a response status of
 * `invalid_response` will be returned.
 *
 * When using a well designed validator, like the `JSONResponseValidator`, the wave can typically
 * repair a bad response in a single additional model call. Sometimes it takes a couple of calls
 * to effect a repair and occasionally it won't be able to repair it at all. If your prompt is
 * well designed and you only occasionally see failed repair attempts, I'd recommend just calling
 * the wave a second time. Given the stochastic nature of these models, there's a decent chance
 * it won't make the same mistake on the second call. A well designed prompt coupled with a well
 * designed validator should get the reliability of calling these models somewhere close to 99%
 * reliable.
 *
 * This "feedback" technique works with all the GPT-3 generation of models and I've tested it with
 * `text-davinci-003`, `gpt-3.5-turbo`, and `gpt-4`. There's a good chance it will work with other
 * open source models like `LLaMA` and Googles `Bard` but I have yet to test it with those models.
 *
 * LLMClient supports OpenAI's functions feature and can validate the models response against the
 * schema for the supported functions. When an LLMClient is configured with both a `OpenAIModel`
 * and a `FunctionResponseValidator`, the model will be cloned and configured to send the
 * validators configured list of functions with the request. There's no need to separately
 * configure the models `functions` list, but if you do, the models functions list will be sent
 * instead.
 * @template TContent Optional. Type of message content returned for a 'success' response. The `response.message.content` field will be of type TContent. Defaults to `any`.
 */
export declare class LLMClient<TContent = any> {
    private readonly _startStreamingMessage;
    private readonly _endStreamHandler;
    private readonly _enableFeedbackLoop;
    private readonly _feedbackLoopType;
    /**
     * Configured options for this LLMClient instance.
     */
    readonly options: ConfiguredLLMClientOptions<TContent>;
    /**
     * Creates a new `LLMClient` instance.
     * @param {LLMClientOptions<TContent>} options - Options to configure the instance with.
     */
    constructor(options: LLMClientOptions<TContent>);
    /**
     * Completes a prompt.
     * @remarks
     * The `input` parameter is optional but if passed in, will be assigned to memory using the
     * configured `input_variable`. If it's not passed in an attempt will be made to read it
     * from memory so passing it in or assigning to memory works. In either case, the `input`
     * variable is only used when constructing a user message that, will be added to the
     * conversation history and formatted like `{ role: 'user', content: input }`.
     *
     * It's important to note that if you want the users input sent to the model as part of the
     * prompt, you will need to add a `UserMessage` section to your prompt. The wave does not do
     * anything to modify your prompt, except when performing repairs and those changes are
     * temporary.
     *
     * When the model successfully returns a valid (or repaired) response, a 'user' message (if
     * input was detected) and 'assistant' message will be automatically added to the conversation
     * history. You can disable that behavior by setting `max_history_messages` to `0`.
     *
     * The response returned by `completePrompt()` will be strongly typed by the validator you're
     * using. The `DefaultResponseValidator` returns a `string` and the `JSONResponseValidator`
     * will return either an `object` or if a JSON Schema is provided, an instance of `TContent`.
     * When using a custom validator, the validator is return any type of content it likes.
     *
     * A successful response is indicated by `response.status == 'success'` and the content can be
     * accessed via `response.message.content`.  If a response is invalid it will have a
     * `response.status == 'invalid_response'` and the `response.message` will be a string containing
     * the validator feedback message.  There are other status codes for various errors and in all
     * cases except `success` the `response.message` will be of type `string`.
     * @template TContent Optional. Type of message content returned for a 'success' response. The `response.message.content` field will be of type TContent. Defaults to `any`.
     * @param {TurnContext} context - Current turn context.
     * @param {Memory} memory - An interface for accessing state values.
     * @param {PromptFunctions} functions - Functions to use when rendering the prompt.
     * @returns {Promise<PromptResponse<TContent>>} A `PromptResponse` with the status and message.
     */
    completePrompt(context: TurnContext, memory: Memory, functions: PromptFunctions): Promise<PromptResponse<TContent>>;
    /**
     * @param {TurnContext} context - Current turn context.
     * @param {Memory} memory - An interface for accessing state values.
     * @param {PromptFunctions} functions - Functions to use when rendering the prompt.
     * @returns {Promise<PromptResponse<TContent>>} A `PromptResponse` with the status and message.
     * @private
     */
    callCompletePrompt(context: TurnContext, memory: Memory, functions: PromptFunctions): Promise<PromptResponse<TContent>>;
    /**
     * @param {Memory} memory - Current memory.
     * @param {string} variable - Variable to fetch value from memory.
     * @param {Message<any> | Message<any>[]} message - The Message to be added to history.
     * @private
     */
    private addMessageToHistory;
    /**
     * @param {TurnContext} context - The current TurnContext
     * @param {MemoryFork} fork - The current fork of memory to be repaired.
     * @param {PromptFunctions} functions - Functions to use.
     * @param {PromptResponse<TContent>} response - The response that needs repairing.
     * @param {Validation} validation - The Validation object to be used during repair.
     * @param {number} remaining_attempts - The number of remaining attempts.
     * @returns {Promise<PromptResponse<TContent>>} - The repaired response.
     * @private
     */
    private repairResponse;
}
//# sourceMappingURL=LLMClient.d.ts.map