import { ResponseFormat } from "./openai_api_protocols"; import { LogitProcessor, InitProgressCallback, LogLevel } from "./types"; /** * Conversation template config */ export interface ConvTemplateConfig { system_template: string; system_message: string; roles: Record<Role, string>; role_templates?: Partial<Record<Role, string>>; seps: Array<string>; role_content_sep?: string; role_empty_sep?: string; stop_str: Array<string>; system_prefix_token_ids?: Array<number>; stop_token_ids: Array<number>; add_role_after_system_message?: boolean; } export declare enum Role { user = "user", assistant = "assistant", tool = "tool" } export declare const DefaultLogLevel: LogLevel; /** * Place holders that can be used in role templates. * For example, a role template of * `<<question>> ${MessagePlaceholders.USER} <<function>> ${MessagePlaceholders.FUNCTION}` * will insert the user message to ${MessagePlaceholders.USER} * and insert the function message to ${MessagePlaceholders.FUNCTION} * at run time. */ export declare enum MessagePlaceholders { system = "{system_message}", user = "{user_message}", assistant = "{assistant_message}", tool = "{tool_message}", function = "{function_string}", hermes_tools = "{hermes_tools}" } /** * Information about the tokenizer. Currently, only `token_postproc_method` is used to * post process the token table when using grammar. */ export interface TokenizerInfo { token_postproc_method: string; prepend_space_in_encode: boolean; strip_space_in_decode: boolean; } /** * Config of one chat model, a data structure representing `mlc-chat-config.json`. * This only corresponds to the chat-related fields and `tokenizer_files` of `mlc-chat-config.json`. * Only these fields affect the conversation in runtime. * i.e. The third part in https://llm.mlc.ai/docs/get_started/mlc_chat_config.html. * * This is initialized in `MLCEngine.reload()` with the model's `mlc-chat-config.json`. */ export interface ChatConfig { tokenizer_files: Array<string>; tokenizer_info?: TokenizerInfo; token_table_postproc_method?: string; vocab_size: number; conv_config?: Partial<ConvTemplateConfig>; conv_template: ConvTemplateConfig; context_window_size: number; sliding_window_size: number; attention_sink_size: number; repetition_penalty: number; frequency_penalty: number; presence_penalty: number; top_p: number; temperature: number; bos_token_id?: number; } /** * Custom options that can be used to override known config values. */ export interface ChatOptions extends Partial<ChatConfig> { } /** * Optional configurations for `CreateMLCEngine()` and `CreateWebWorkerMLCEngine()`. * * appConfig: Configure the app, including the list of models and whether to use IndexedDB cache. * initProgressCallback: A callback for showing the progress of loading the model. * logitProcessorRegistry: A register for stateful logit processors, see `webllm.LogitProcessor`. * * @note All fields are optional, and `logitProcessorRegistry` is only used for `MLCEngine` and not * other `MLCEngine`s. */ export interface MLCEngineConfig { appConfig?: AppConfig; initProgressCallback?: InitProgressCallback; logitProcessorRegistry?: Map<string, LogitProcessor>; logLevel?: LogLevel; } /** * Config for a single generation. * Essentially `ChatConfig` without `tokenizer_files`, `conv_config`, or `conv_template`. * We also support additional fields not present in `mlc-chat-config.json` due to OpenAI-like APIs. * * Note that all values are optional. If unspecified, we use whatever values in `ChatConfig` * initialized during `MLCEngine.reload()`. */ export interface GenerationConfig { repetition_penalty?: number; ignore_eos?: boolean; top_p?: number | null; temperature?: number | null; max_tokens?: number | null; frequency_penalty?: number | null; presence_penalty?: number | null; stop?: string | null | Array<string>; n?: number | null; logit_bias?: Record<string, number> | null; logprobs?: boolean | null; top_logprobs?: number | null; response_format?: ResponseFormat | null; enable_thinking?: boolean | null; } export declare function postInitAndCheckGenerationConfigValues(config: GenerationConfig): void; export declare enum ModelType { "LLM" = 0, "embedding" = 1, "VLM" = 2 } /** * Information for a model. * @param model: the huggingface link to download the model weights, accepting four formats: * - https://huggingface.co/{USERNAME}/{MODEL}, which we automatically use the main branch * - https://huggingface.co/{USERNAME}/{MODEL}/, which we automatically use the main branch * - https://huggingface.co/{USERNAME}/{MODEL}/resolve/{BRANCH} * - https://huggingface.co/{USERNAME}/{MODEL}/resolve/{BRANCH}/ * @param model_id: what we call the model. * @param model_lib: link to the model library (wasm file) the model uses. * @param overrides: partial ChatConfig to override mlc-chat-config.json; can be used to change KVCache settings. * @param vram_required_MB: amount of vram in MB required to run the model (can use * `utils/vram_requirements` to calculate). * @param low_resource_required: whether the model can run on limited devices (e.g. Android phone). * @param buffer_size_required_bytes: required `maxStorageBufferBindingSize`, different for each device. * @param required_features: feature needed to run this model (e.g. shader-f16). * @param model_type: the intended usecase for the model, if unspecified, default to LLM. */ export interface ModelRecord { model: string; model_id: string; model_lib: string; overrides?: ChatOptions; vram_required_MB?: number; low_resource_required?: boolean; buffer_size_required_bytes?: number; required_features?: Array<string>; model_type?: ModelType; } /** * Extra configuration that can be * passed to the load. * * @param model_list: models to be used. * @param useIndexedDBCache: if true, will use IndexedDBCache to cache models and other artifacts. * If false or unspecified, will use the Cache API. For more information of the two, see: * https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser * * @note Note that the Cache API is more well-tested in WebLLM as of now. */ export interface AppConfig { model_list: Array<ModelRecord>; useIndexedDBCache?: boolean; } /** * modelVersion: the prebuilt model libraries that the current npm is compatible with, affects the * `model_lib`s in `prebuiltAppConfig`. * * @note The model version does not have to match the npm version, since not each npm update * requires an update of the model libraries. */ export declare const modelVersion = "v0_2_48"; export declare const modelLibURLPrefix = "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/web-llm-models/"; /** * Models that support function calling (i.e. usage of `ChatCompletionRequest.tools`). More to come. */ export declare const functionCallingModelIds: string[]; /** * Default models and model library mapping to be used if unspecified. * * @note This is the only source of truth of which prebuilt model libraries are compatible with the * current WebLLM npm version. */ export declare const prebuiltAppConfig: AppConfig; //# sourceMappingURL=config.d.ts.map