/** * @module teams-ai */ /** * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. */ import { TurnContext } from 'botbuilder'; import { Memory } from '../MemoryFork'; import { PromptCompletionModel, PromptCompletionModelResponseReceivedEvent } from '../models'; import { PromptFunctions, PromptTemplate } from '../prompts'; import { Tokenizer } from '../tokenizers'; import { PromptResponse } from '../types'; import { PromptResponseValidator } from '../validators'; /** * Options for an LLMClient instance. * @template TContent Optional. Type of message content returned for a 'success' response. The `response.message.content` field will be of type TContent. Defaults to `any`. */ export interface LLMClientOptions { /** * AI model to use for completing prompts. */ model: PromptCompletionModel; /** * Prompt to use for the conversation. */ template: PromptTemplate; /** * Optional. Memory variable used for storing conversation history. * @remarks * The history will be stored as a `Message[]` and the variable defaults to `conversation.history`. */ history_variable?: string; /** * Optional. Memory variable used for storing the users input message. * @remarks * The users input is expected to be a `string` but it's optional and defaults to `temp.input`. */ input_variable?: string; /** * Optional. Maximum number of conversation history messages to maintain. * @remarks * The number of tokens worth of history included in the prompt is controlled by the * `ConversationHistory` section of the prompt. This controls the automatic pruning of the * conversation history that's done by the LLMClient instance. This helps keep your memory from * getting too big and defaults to a value of `10` (or 5 turns.) */ max_history_messages?: number; /** * Optional. Maximum number of automatic repair attempts the LLMClient instance will make. * @remarks * This defaults to a value of `3` and can be set to `0` if you wish to disable repairing of bad responses. */ max_repair_attempts?: number; /** * Optional. Tokenizer to use when rendering the prompt or counting tokens. * @remarks * If not specified, a new instance of `GPTTokenizer` will be created. GPT3Tokenizer can be passed in for gpt-3 models. */ tokenizer?: Tokenizer; /** * Optional. Response validator to use when completing prompts. * @remarks * If not specified a new instance of `DefaultResponseValidator` will be created. The * DefaultResponseValidator returns a `Validation` that says all responses are valid. */ validator?: PromptResponseValidator; /** * Optional. If true, any repair attempts will be logged to the console. */ logRepairs?: boolean; /** * Optional message to send a client at the start of a streaming response. */ startStreamingMessage?: string; /** * Optional handler to run when a stream is about to conclude. */ endStreamHandler?: PromptCompletionModelResponseReceivedEvent; /** * If true, the feedback loop will be enabled for streaming responses. */ enableFeedbackLoop?: boolean; /** * The type of the feedback loop. */ feedbackLoopType?: 'default' | 'custom'; } /** * The configuration of the LLMClient instance. */ export interface ConfiguredLLMClientOptions { /** * AI model used for completing prompts. */ model: PromptCompletionModel; /** * Memory variable used for storing conversation history. */ history_variable: string; /** * Memory variable used for storing the users input message. */ input_variable: string; /** * Maximum number of conversation history messages that will be persisted to memory. */ max_history_messages: number; /** * Maximum number of automatic repair attempts that will be made. */ max_repair_attempts: number; /** * Prompt used for the conversation. */ template: PromptTemplate; /** * Tokenizer used when rendering the prompt or counting tokens. */ tokenizer: Tokenizer; /** * Response validator used when completing prompts. */ validator: PromptResponseValidator; /** * If true, any repair attempts will be logged to the console. */ logRepairs: boolean; } /** * LLMClient class that's used to complete prompts. * @remarks * Each wave, at a minimum needs to be configured with a `client`, `prompt`, and `prompt_options`. * * Configuring the wave to use a `validator` is optional but recommended. The primary benefit to * using LLMClient is it's response validation and automatic response repair features. The * validator acts as guard and guarantees that you never get an malformed response back from the * model. At least not without it being flagged as an `invalid_response`. * * Using the `JSONResponseValidator`, for example, guarantees that you only ever get a valid * object back from `completePrompt()`. In fact, you'll get back a fully parsed object and any * additional response text from the model will be dropped. If you give the `JSONResponseValidator` * a JSON Schema, you will get back a strongly typed and validated instance of an object in * the returned `response.message.content`. * * When a validator detects a bad response from the model, it gives the model "feedback" as to the * problem it detected with its response and more importantly an instruction that tells the model * how it should repair the problem. This puts the wave into a special repair mode where it first * forks the memory for the conversation and then has a side conversation with the model in an * effort to get it to repair its response. By forking the conversation, this isolates the bad * response and prevents it from contaminating the main conversation history. If the response can * be repaired, the wave will un-fork the memory and use the repaired response in place of the * original bad response. To the model it's as if it never made a mistake which is important for * future turns with the model. If the response can't be repaired, a response status of * `invalid_response` will be returned. * * When using a well designed validator, like the `JSONResponseValidator`, the wave can typically * repair a bad response in a single additional model call. Sometimes it takes a couple of calls * to effect a repair and occasionally it won't be able to repair it at all. If your prompt is * well designed and you only occasionally see failed repair attempts, I'd recommend just calling * the wave a second time. Given the stochastic nature of these models, there's a decent chance * it won't make the same mistake on the second call. A well designed prompt coupled with a well * designed validator should get the reliability of calling these models somewhere close to 99% * reliable. * * This "feedback" technique works with all the GPT-3 generation of models and I've tested it with * `text-davinci-003`, `gpt-3.5-turbo`, and `gpt-4`. There's a good chance it will work with other * open source models like `LLaMA` and Googles `Bard` but I have yet to test it with those models. * * LLMClient supports OpenAI's functions feature and can validate the models response against the * schema for the supported functions. When an LLMClient is configured with both a `OpenAIModel` * and a `FunctionResponseValidator`, the model will be cloned and configured to send the * validators configured list of functions with the request. There's no need to separately * configure the models `functions` list, but if you do, the models functions list will be sent * instead. * @template TContent Optional. Type of message content returned for a 'success' response. The `response.message.content` field will be of type TContent. Defaults to `any`. */ export declare class LLMClient { private readonly _startStreamingMessage; private readonly _endStreamHandler; private readonly _enableFeedbackLoop; private readonly _feedbackLoopType; /** * Configured options for this LLMClient instance. */ readonly options: ConfiguredLLMClientOptions; /** * Creates a new `LLMClient` instance. * @param {LLMClientOptions} options - Options to configure the instance with. */ constructor(options: LLMClientOptions); /** * Completes a prompt. * @remarks * The `input` parameter is optional but if passed in, will be assigned to memory using the * configured `input_variable`. If it's not passed in an attempt will be made to read it * from memory so passing it in or assigning to memory works. In either case, the `input` * variable is only used when constructing a user message that, will be added to the * conversation history and formatted like `{ role: 'user', content: input }`. * * It's important to note that if you want the users input sent to the model as part of the * prompt, you will need to add a `UserMessage` section to your prompt. The wave does not do * anything to modify your prompt, except when performing repairs and those changes are * temporary. * * When the model successfully returns a valid (or repaired) response, a 'user' message (if * input was detected) and 'assistant' message will be automatically added to the conversation * history. You can disable that behavior by setting `max_history_messages` to `0`. * * The response returned by `completePrompt()` will be strongly typed by the validator you're * using. The `DefaultResponseValidator` returns a `string` and the `JSONResponseValidator` * will return either an `object` or if a JSON Schema is provided, an instance of `TContent`. * When using a custom validator, the validator is return any type of content it likes. * * A successful response is indicated by `response.status == 'success'` and the content can be * accessed via `response.message.content`. If a response is invalid it will have a * `response.status == 'invalid_response'` and the `response.message` will be a string containing * the validator feedback message. There are other status codes for various errors and in all * cases except `success` the `response.message` will be of type `string`. * @template TContent Optional. Type of message content returned for a 'success' response. The `response.message.content` field will be of type TContent. Defaults to `any`. * @param {TurnContext} context - Current turn context. * @param {Memory} memory - An interface for accessing state values. * @param {PromptFunctions} functions - Functions to use when rendering the prompt. * @returns {Promise>} A `PromptResponse` with the status and message. */ completePrompt(context: TurnContext, memory: Memory, functions: PromptFunctions): Promise>; /** * @param {TurnContext} context - Current turn context. * @param {Memory} memory - An interface for accessing state values. * @param {PromptFunctions} functions - Functions to use when rendering the prompt. * @returns {Promise>} A `PromptResponse` with the status and message. * @private */ callCompletePrompt(context: TurnContext, memory: Memory, functions: PromptFunctions): Promise>; /** * @param {Memory} memory - Current memory. * @param {string} variable - Variable to fetch value from memory. * @param {Message | Message[]} message - The Message to be added to history. * @private */ private addMessageToHistory; /** * @param {TurnContext} context - The current TurnContext * @param {MemoryFork} fork - The current fork of memory to be repaired. * @param {PromptFunctions} functions - Functions to use. * @param {PromptResponse} response - The response that needs repairing. * @param {Validation} validation - The Validation object to be used during repair. * @param {number} remaining_attempts - The number of remaining attempts. * @returns {Promise>} - The repaired response. * @private */ private repairResponse; } //# sourceMappingURL=LLMClient.d.ts.map