import { CommandModule } from "yargs"; import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js"; import { BuildGpu, LlamaNuma } from "../../bindings/types.js"; import { SpecializedChatWrapperTypeName } from "../../chatWrappers/utils/resolveChatWrapper.js"; import { ParsedXtcArg } from "../utils/parseXtcArg.js"; import { GgmlType } from "../../gguf/types/GgufTensorInfoTypes.js"; type ChatCommand = { modelPath?: string; header?: string[]; gpu?: BuildGpu | "auto"; systemInfo: boolean; systemPrompt?: string; systemPromptFile?: string; prompt?: string; promptFile?: string; wrapper: SpecializedChatWrapperTypeName | "auto"; noJinja?: boolean; contextSize?: number; batchSize?: number; flashAttention?: boolean; kvCacheKeyType?: "currentQuant" | keyof typeof GgmlType; kvCacheValueType?: "currentQuant" | keyof typeof GgmlType; swaFullCache?: boolean; noTrimWhitespace: boolean; grammar: "text" | Parameters[1]; jsonSchemaGrammarFile?: string; threads?: number; temperature: number; minP: number; topK: number; topP: number; seed?: number; xtc?: ParsedXtcArg; gpuLayers?: number; repeatPenalty: number; lastTokensRepeatPenalty: number; penalizeRepeatingNewLine: boolean; repeatFrequencyPenalty?: number; repeatPresencePenalty?: number; dryRepeatPenaltyStrength?: number; dryRepeatPenaltyBase?: number; dryRepeatPenaltyAllowedLength?: number; dryRepeatPenaltyLastTokens?: number; maxTokens: number; reasoningBudget?: number; noHistory: boolean; environmentFunctions: boolean; tokenPredictionDraftModel?: string; tokenPredictionModelContextSize?: number; debug: boolean; numa?: LlamaNuma; meter: boolean; timing: boolean; noMmap: boolean; useDirectIo: boolean; printTimings: boolean; }; export declare const ChatCommand: CommandModule; export {};