import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp"; import { LLM, BaseLLMCallOptions, BaseLLMParams } from "./base.js"; /** * Note that the modelPath is the only required parameter. For testing you * can set this in the environment variable `LLAMA_PATH`. */ export interface LlamaCppInputs extends BaseLLMParams { /** Prompt processing batch size. */ batchSize?: number; /** Text context size. */ contextSize?: number; /** Embedding mode only. */ embedding?: boolean; /** Use fp16 for KV cache. */ f16Kv?: boolean; /** Number of layers to store in VRAM. */ gpuLayers?: number; /** The llama_eval() call computes all logits, not just the last one. */ logitsAll?: boolean; /** If true, reduce VRAM usage at the cost of performance. */ lowVram?: boolean; /** Path to the model on the filesystem. */ modelPath: string; /** If null, a random seed will be used. */ seed?: null | number; /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */ temperature?: number; /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */ topK?: number; /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */ topP?: number; /** Force system to keep model in RAM. */ useMlock?: boolean; /** Use mmap if possible. */ useMmap?: boolean; /** Only load the vocabulary, no weights. */ vocabOnly?: boolean; } export interface LlamaCppCallOptions extends BaseLLMCallOptions { /** The maximum number of tokens the response should contain. */ maxTokens?: number; /** A function called when matching the provided token array */ onToken?: (tokens: number[]) => void; } /** * To use this model you need to have the `node-llama-cpp` module installed. * This can be installed using `npm install -S node-llama-cpp` and the minimum * version supported in version 2.0.0. * This also requires that have a locally built version of Llama2 installed. */ export declare class LlamaCpp extends LLM { CallOptions: LlamaCppCallOptions; static inputs: LlamaCppInputs; batchSize?: number; contextSize?: number; embedding?: boolean; f16Kv?: boolean; gpuLayers?: number; logitsAll?: boolean; lowVram?: boolean; seed?: null | number; useMlock?: boolean; useMmap?: boolean; vocabOnly?: boolean; modelPath: string; _model: LlamaModel; _context: LlamaContext; _session: LlamaChatSession; static lc_name(): string; constructor(inputs: LlamaCppInputs); _llmType(): string; /** @ignore */ _call(prompt: string, options?: this["ParsedCallOptions"]): Promise; }