import { Embedding, LayerNorm, Linear, Tensor } from '@jsgrad/jsgrad/base';
type GPTConfig = {
    block_size: number;
    vocab_size: number;
    padded_vocab_size: number;
    n_layer: number;
    n_head: number;
    n_embd: number;
};
declare class CausalSelfAttention {
    c_attn: Linear;
    c_proj: Linear;
    n_head: number;
    n_embd: number;
    bias: Tensor;
    constructor(config: GPTConfig);
    call: (x: Tensor) => Tensor;
}
declare class MLP {
    c_fc: Linear;
    c_proj: Linear;
    constructor(config: GPTConfig);
    call: (x: Tensor) => Tensor;
}
declare class Block {
    ln_1: LayerNorm;
    attn: CausalSelfAttention;
    ln_2: LayerNorm;
    mlp: MLP;
    constructor(config: GPTConfig);
    call: (x: Tensor) => Tensor;
}
export declare class GPT {
    config: GPTConfig;
    wte: Embedding;
    wpe: Embedding;
    h: Block[];
    ln_f: LayerNorm;
    lm_head: Linear;
    constructor(config: GPTConfig);
    load_pretrained: () => Promise<void>;
    generate: (idx: Tensor, max_new_tokens: number, temperature?: number, top_k?: number) => Tensor;
    call: (idx: Tensor, targets?: Tensor) => [Tensor, Tensor?];
}
export {};