import { spawn } from "node:child_process"; import { log } from "node:console"; import { createWriteStream } from "node:fs"; import { basename } from "node:path"; import { exit } from "node:process"; import chalk from "chalk"; import { isDefined } from "inferred-types"; import { DEFAULT_MODEL_DIR } from "../constants"; import { chooseModel } from "../questions"; import { bye, getLocalModels, isPortInUse } from "../util"; export async function speculate(args: string[]) { let [draft, production] = args; if (isDefined(draft)) { const draftModels = await getLocalModels(draft); if (draftModels.length > 0) { const model = basename(draftModels[0]); draft = draftModels[0]; log(`- using ${chalk.bold(model)} as ${chalk.italic("draft")} model.`); } else { if (isDefined(draft)) { log(`- the model "${draft}" wasn't found locally`); log(`- the current model directory is: ${chalk.blue(DEFAULT_MODEL_DIR)}`); log(); log(`Did you mean one of these models?`); const model = await chooseModel(draft.replace(".gguf", "")); if (!model) { bye(); exit(); } else { draft = model; } } } } else { log(`\n- no ${chalk.bold.blue("Draft Model")} specified, choose from the following:\n`); const model = await chooseModel(); if (!model) { bye(); exit(); } draft = model; } if (isDefined(production)) { const localModels = await getLocalModels(production); if (localModels.length > 0) { draft = localModels[0]; const modelName = basename(draft); log(`- using ${chalk.bold(modelName)} as ${chalk.italic("main")} model.`); } else { log(`- the model "${production}" wasn't found locally`); log(`- the current model directory is: ${chalk.blue(DEFAULT_MODEL_DIR)}`); log(); log(`Did you mean one of these models?`); const model = await chooseModel(production.replace(".gguf", "")); if (!model) { bye(); exit(); } else { production = model; } } } else { log(`\n- no ${chalk.bold.blue("Main Model")} specified, choose from the following:\n`); const model = await chooseModel(); if (!model) { bye(); exit(); } production = model; } const port = 8087; if (await isPortInUse(port)) { log(`${chalk.red("Error:")} Port ${port} is already in use. Please use a different port.`); exit(1); } log(`\nStarting speculative LLM using ${chalk.green("llama.cpp")}`); const params = [ `-m`, production, `-md`, draft, `-c`, "4096", `-cd`, "4096", `-ngl`, "99", `--draft-max`, "8", `--draft-min`, "4", `--draft-p-min`, "0.9", `--host`, "0.0.0.0", `--port`, port.toString(), ]; const logFilePath = `llama-server-${Date.now()}.log`; const logStream = createWriteStream(logFilePath, { flags: "a" }); const child = spawn("llama-server", params, { detached: true, stdio: ["ignore", "pipe", "pipe"], }); // Pipe STDOUT and STDERR to the log file child.stdout?.pipe(logStream); child.stderr?.pipe(logStream); // Detach the child process so it can continue independently child.unref(); const pid = child.pid; log(`- Speculative LLM spawned and running with PID: ${pid}`); log(`- Logs are being written to: ${chalk.blue(logFilePath)}`); log(); log( `- You can rerun this model combination with:\n ${chalk.blue( `model speculate ${basename(draft)} ${basename(production)}`, )}`, ); log(); }