import type { RwkvInvocation } from "@llama-node/rwkv-cpp";
import { LLM } from "llama-node";
import { RwkvCpp, type LoadConfig } from "llama-node/dist/llm/rwkv-cpp.js";
import path from "path";

const modelPath = path.resolve(
    process.cwd(),
    "../ggml-rwkv-4_raven-7b-v9-Eng99%-20230412-ctx8192-Q4_1_0.bin"
);
const tokenizerPath = path.resolve(process.cwd(), "../20B_tokenizer.json");

const rwkv = new LLM(RwkvCpp);

const config: LoadConfig = {
    modelPath,
    tokenizerPath,
    nThreads: 4,
    enableLogging: true,
};

const template = `Who is the president of the United States?`;

const prompt = `Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction: ${template}

### Response:`;

const params: RwkvInvocation = {
    maxPredictLength: 2048,
    topP: 0.1,
    temp: 0.1,
    prompt,
};

const run = async () => {
    const abortController = new AbortController();

    await rwkv.load(config);

    setTimeout(() => {
        abortController.abort();
    }, 3000);

    try {
        await rwkv.createCompletion(
            params,
            (response) => {
                process.stdout.write(response.token);
            },
            abortController.signal
        );
    } catch (e) {
        console.log(e);
    }
};

run();