import type { Generate } from "@llama-node/llama-cpp"; import { LLM } from "llama-node"; import { LLamaCpp, type LoadConfig } from "llama-node/dist/llm/llama-cpp.js"; import path from "path"; const model = path.resolve(process.cwd(), "../ggml-vic7b-q5_1.bin"); const llama = new LLM(LLamaCpp); const config: LoadConfig = { modelPath: model, enableLogging: true, nCtx: 1024, seed: 0, f16Kv: false, logitsAll: false, vocabOnly: false, useMlock: false, embedding: false, useMmap: true, nGpuLayers: 0 }; const template = `How are you?`; const prompt = `A chat between a user and an assistant. USER: ${template} ASSISTANT:`; const params: Generate = { nThreads: 4, nTokPredict: 2048, topK: 40, topP: 0.1, temp: 0.2, repeatPenalty: 1, prompt, }; const run = async () => { await llama.load(config); await llama.createCompletion(params, (response) => { process.stdout.write(response.token); }); }; run();