import { Tensor, type TqdmOnProgress } from '@jsgrad/jsgrad/base'; declare const MODEL_PARAMS: { '1B': { args: { dim: number; n_heads: number; n_kv_heads: number; n_layers: number; norm_eps: number; rope_theta: number; vocab_size: number; hidden_dim: number; }; files: number; }; '8B': { args: { dim: number; n_heads: number; n_kv_heads: number; n_layers: number; norm_eps: number; rope_theta: number; vocab_size: number; hidden_dim: number; }; files: number; }; '70B': { args: { dim: number; n_heads: number; n_kv_heads: number; n_layers: number; norm_eps: number; rope_theta: number; vocab_size: number; hidden_dim: number; }; files: number; }; }; export type Llama3Size = keyof typeof MODEL_PARAMS; export type Llama3Quantize = 'int8' | 'nf4' | 'float16'; export type Llama3Constructor = { size: Llama3Size; quantize?: Llama3Quantize; device?: string | string[]; max_context?: number; top_k?: number; top_p?: number; temperature?: number; alpha_f?: number; alpha_p?: number; }; export type Llama3Load = { system?: string; onProgress?: TqdmOnProgress; }; export type Llama3StaticLoad = Llama3Constructor & Llama3Load; export type Llama3Message = { role: 'user' | 'assistant'; content: string; }; export type Llama3Chat = { messages: Llama3Message[]; onProgress?: TqdmOnProgress; onToken?: (res: Llama3Response & { token: string; }) => void; }; export type Llama3StopReason = 'end_turn'; export type Llama3Usage = { input_tokens: number; output_tokens: number; time_to_first_token: number; tokens_per_second: number; }; export type Llama3Response = { message: Llama3Message; stop_reason?: Llama3StopReason; usage: Llama3Usage; }; export declare class Llama3 implements Llama3Constructor { size: Llama3Size; quantize?: Llama3Quantize; device: string | string[]; max_context: number; temperature: number; top_k: number; top_p: number; alpha_f: number; alpha_p: number; private model; private tokenizer?; private start_pos; private last_seen_toks; private quantize_embeds; private linear; private embedding; constructor(args: Llama3Constructor); _load: (fn: string, onProgress?: TqdmOnProgress) => Promise>; load: ({ onProgress, system }: Llama3Load) => Promise; static load: ({ onProgress, system, ...args }: Llama3StaticLoad) => Promise; _system: (msg?: string, onProgress?: TqdmOnProgress) => Promise; chat: ({ messages, onProgress, onToken }: Llama3Chat) => Promise; _prefill: (toks: number[], onProgress?: TqdmOnProgress) => Promise; _download: (dir?: string, onProgress?: TqdmOnProgress) => Promise; _call: (input: Tensor) => Promise; } export {};