declare class Tensor {
    requires_grad: boolean;
    _data: Array<any>;
    shape: Array<any>;
    _grad: Tensor;
    children: Array<any>;
    parents: Array<any>;
    operation: any;
    visited: boolean;
    m: Tensor;
    v: Tensor;
    device: string;
    forwardKernel: any;
    backwardKernelA: any;
    backwardKernelB: any;
    batch_size: number | null;
    gpu: any;
    warned: boolean;
    /**
     * Creates new instance of the Tensor class.
     * @param {object} data - Iterable containing the data to be stored in the Tensor.
     * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
     * @param {string} device - Device to store Tensor. Either "gpu" or "cpu".
     */
    constructor(data: Array<any> | number, requires_grad?: boolean, device?: string);
    /**
     * Returns the data in the Tensor.
     */
    get data(): Array<any>;
    /**
     * Returns the data's length'.
     */
    get length(): number;
    /**
     * Returns the number of dimensions in the Tensor.
     */
    get ndims(): number;
    /**
     * Returns the tensor's gradients.
     */
    get grad(): any[];
    /**
     * Performs backward pass from THIS tensor backwards.
     * It fills every tensor that originated this one and that has requires_grad=true's gradients to their gradients relative to THIS tensor.
     */
    backward(grad?: Tensor | null, child?: Tensor | null): void;
    /**
     * Sends this Tensor to the provided device.
     * @param {string} device - Device to store Tensor. Either "gpu" or "cpu".
     * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
     * @param {string} device - gpu or cpu: device to store Tensor.
     */
    to(device: string): void;
    /**
     * Reset this Tensor's gradients to zero.
     */
    zero_grad(): void;
    /**
     * Reset the gradients of this Tensor, and of all of the Tensors that led to it.
     */
    zero_grad_graph(): void;
    /**
     * Turns the data in the Tensor into a javascript list object.
     */
    tolist(): any[];
    /**
     * Gets the sum of the Tensor over a specified dimension.
     * @param {number} dim - Dimension to sum over.
     * @param {boolean} keepdims - Whether to keep dimensions of original tensor.
     * @returns {Tensor} - Final tensor.
     */
    sum(dim?: number, keepdims?: boolean): Tensor;
    /**
     * Gets the mean of the Tensor over a specified dimension.
     * @param {number} dim - Dimension to get mean over.
     * @param {boolean} keepdims - Whether to keep dimensions of original tensor.
     * @returns {Tensor} - Final tensor.
     */
    mean(dim?: number, keepdims?: boolean): Tensor;
    /**
     * Gets the variance of the Tensor over a specified dimension.
     * @param {number} dim - Dimension to get variance over.
     * @param {boolean} keepdims - Whether to keep dimensions of original tensor.
     * @returns {Tensor} - Final tensor.
     */
    variance(dim?: number, keepdims?: boolean): Tensor;
    /**
     * Add integer or other Tensor to this Tensor.
     * @param {any} other - Tensor or integer to be added to this Tensor.
     * @returns {object} New tensor.
     */
    add(other: Tensor | number): Tensor;
    /**
     * Subtract integer or other Tensor from this Tensor.
     * @param {any} other - Tensor or integer to be subtracted from this Tensor.
     * @returns {object} New tensor.
     */
    sub(other: Tensor | number): Tensor;
    /**
     * Get element-wise opposite of given tensor ( every element * (-1) )
     * @returns {object} New tensor.
     */
    neg(): Tensor;
    /**
     * Multiply this Tensor by integer or other Tensor.
     * @param {any} other - Tensor or integer to multiply this Tensor by.
     * @returns {object} New tensor.
     */
    mul(other: Tensor | number): Tensor;
    /**
     * Divide this Tensor by integer or other Tensor.
     * @param {Tensor | number} other - Tensor or integer to divide this Tensor by.
     * @returns {Tensor} New tensor.
     */
    div(other: Tensor | number): Tensor;
    /**
     * Multiply this Tensor by integer or other Tensor.
     * @param {Tensor | number} other - Tensor or integer to multiply this Tensor by.
     * @returns {Tensor} New tensor.
     */
    matmul(other: Tensor): Tensor;
    /**
     * Get tensor to element-wise power of n.
     * @param {number} n - Exponent.
     * @returns {object} New tensor.
     */
    pow(n: number): Tensor;
    /**
     * Get element-wise square root of given tensor.
     * @returns {object} New tensor.
     */
    sqrt(): Tensor;
    /**
     * Get element-wise exponentiation of given tensor ( e^(every element) )
     * @returns {object} New tensor.
     */
    exp(): Tensor;
    /**
     * Get element-wise natural log of given tensor ( ln(every element) )
     * @returns {object} New tensor.
     */
    log(): Tensor;
    /**
     * Transpose the tensor along two consecutive dimensions:
     * @param {number} dim1 - First dimension.
     * @param {number} dim2 - Second dimension.
     * @returns {object} New tensor.
     */
    transpose(dim1: number, dim2: number): Tensor;
    /**
     * In a tensor, returns a list of elements in [index1], or [index1][index2];
     * @param {object} index1 - List containing indexes to extract data from in first dimension.
     * @param {object} index2 - List containing indexes to extract data from in second dimension [OPTIONAL].
     * @returns {object} New tensor.
     * @example
     * let a = tensor([[1,1,2,3],
     *                 [6,7,8,9]])
     *
     * // Returns tensor([2,6,9]):
     * a.at([0,1,1], [2,0,3])
     *
     * // Returns tensor([[1,1,2,3],
     *                    [6,7,8,9],
     *                    [1,1,2,3]])
     * a.at([0,1,0])
     */
    at(index1: Tensor | Array<any>, index2?: Tensor | Array<any>): Tensor;
    /**
     * Where the "condition" function returns True in "mask" Tensor, the "value" will fill the "this" Tensor.
     * @param {Tensor} mask - "condition" will be applied in this tensor element-wise.
     * @param {function} condition - Function that returns True or False element-wise.
     * @param {number} value - Value to fill Tensor when condition is met.
     * @returns {object} New tensor.
     * @example
     * let a = tensor([[1,5,2,3],
     *                 [6,7,2,9]])
     *
     * // Returns tensor([[1,0,2,3],
     * //                 [0,0,2,0]])
     * a.masked_fill(mask, (el) => {return el > 3}, 0)
     */
    masked_fill(mask: Tensor, condition: (someArg: number) => boolean, value: number): Tensor;
    /**
     * Reshape the tensor into the new shape:
     * @param {object} shape - New tensor's shape.
     * @returns {object} New tensor.
     */
    reshape(shape: Array<number>): Tensor;
}
declare class Parameter extends Tensor {
    /**
     * Creates new Parameter (an instance of the Tensor class that always tracks gradients).
     * @param {object} data - Iterable containing the data to be stored in the Tensor.
     */
    constructor(data: Array<any> | number);
}
/**
 * Gets the mean of the Tensor over a specified dimension.
 * @param {Tensor} a - Original Tensor.
 * @param {number} dim - Dimension to get mean over.
 * @param {boolean} keepdims - Whether to keep dimensions of original tensor.
 * @returns {Tensor} - Final tensor.
 */
declare function mean(a: Tensor, dim?: number, keepdims?: boolean): Tensor;
/**
 * Gets the variance of the Tensor over a specified dimension.
 * @param {Tensor} a - Original Tensor.
 * @param {number} dim - Dimension to get variance over.
 * @param {boolean} keepdims - Whether to keep dimensions of original tensor.
 * @returns {Tensor} - Final tensor.
 */
declare function variance(a: Tensor, dim?: number, keepdims?: boolean): Tensor;
/**
 * Add integer or other Tensor to this Tensor.
 * @param {Tensor} a - Original Tensor.
 * @param {any} b - Tensor or integer to be added to this Tensor.
 * @returns {object} New tensor.
 */
declare function add(a: Tensor, b: Tensor | number): Tensor;
/**
 * Get element-wise opposite of given tensor ( every element * (-1) )
 * @returns {object} New tensor.
 */
declare function neg(a: Tensor): Tensor;
/**
 * Multiply this Tensor by integer or other Tensor.
 * @param {any} other - Tensor or integer to multiply this Tensor by.
 * @returns {object} New tensor.
 */
declare function mul(a: Tensor, b: Tensor | number): Tensor;
/**
 * Divide this Tensor by integer or other Tensor.
 * @param {any} other - Tensor or integer to divide this Tensor by.
 * @returns {object} New tensor.
 */
declare function div(a: Tensor, b: Tensor | number): Tensor;
/**
 * Get tensor to element-wise power of n.
 * @param {object} a - Tensor to be elevated to the power of n.
 * @param {number} n - Exponent.
 * @returns {object} New tensor.
 */
declare function pow(a: Tensor, n: number): Tensor;
/**
 * Get element-wise square root of given tensor.
 * @param {object} a - Tensor to be square rooted.
 * @returns {object} New tensor.
 */
declare function sqrt(a: Tensor): Tensor;
/**
 * Get element-wise exponentiation of given tensor ( e^(every element) )
 * @param {object} a - Tensor to be exponentiated.
 * @returns {object} New tensor.
 */
declare function exp(a: Tensor): Tensor;
/**
 * Get element-wise natural log of given tensor ( ln(every element) )
 * @param {object} a - Tensor we will take the log of.
 * @returns {object} New tensor.
 */
declare function log(a: Tensor): Tensor;
/**
 * Multiply this Tensor by integer or other Tensor.
 * @param {any} other - Tensor or integer to multiply this Tensor by.
 * @returns {object} New tensor.
 */
declare function matmul(a: Tensor, b: Tensor): Tensor;
/**
 * Transpose the tensor along two consecutive dimensions:
 * @param {Tensor} a - Tensor to be transposed.
 * @param {number} dim1 - First dimension.
 * @param {number} dim2 - Second dimension.
 * @returns {object} New tensor.
 */
declare function transpose(a: Tensor, dim1: number, dim2: number): Tensor;
/**
 * In a tensor, returns a list of elements in [index1], or [index1][index2];
 * @param {Tensor} a - Original Tensor.
 * @param {object} idx1 - List containing indexes to extract data from in first dimension.
 * @param {object} idx2 - List containing indexes to extract data from in second dimension [OPTIONAL].
 * @returns {object} New tensor.
 * @example
 * let a = tensor([[1,4,2],
 *                 [6,7,8]])
 *
 * // Returns tensor([[1,4,2],
 * //                 [6,7,8],
 * //                 [1,4,2]])
 * a.at([0,1,0])
 *
 * // Returns tensor([2,6,8]):
 * a.at([0,1,1], [2,0,2])
 */
declare function at(a: Tensor, idx1: Tensor | Array<any>, idx2: Tensor | Array<any>): Tensor;
/**
 * Where the "condition" function returns True in the "mask" Tensor, the "value" will fill the "a" Tensor.
 * @param {Tensor} a - Original Tensor.
 * @param {Tensor} mask - "condition" will be applied in this tensor element-wise.
 * @param {function} condition - Function that returns True or False element-wise.
 * @param {number} value - Value to fill Tensor when condition is met.
 * @returns {object} New tensor.
 * @example
 * let a = tensor([[1,5,2,3],
 *                 [6,7,2,9]])
 *
 * // Returns tensor([[1,0,2,3],
 * //                 [0,0,2,0]])
 * masked_fill(a, mask, (el) => {return el > 3}, 0)
 */
declare function masked_fill(a: Tensor, mask: Tensor, condition: (someArg: number) => boolean, value: number): Tensor;
/**
 * Reshape the tensor into the new shape:
 * @param {Tensor} a - Tensor to be reshaped.
 * @param {object} shape - New tensor's shape.
 * @returns {object} New tensor.
 */
declare function reshape(a: Tensor, shape: Array<any>): Tensor;
declare function _reshape(a: Array<any>, shape: number[]): any[];
/**
 * Creates new instance of the Tensor class.
 * @param {object} data - Iterable containing the data to be stored in the Tensor.
 * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
 * @param {string} device - Device to store Tensor. Either "gpu" or "cpu".
 * @returns {object} New tensor.
 */
declare function tensor(data: Array<any>, requires_grad?: boolean, device?: string): Tensor;
/**
 * Creates new instance of the Tensor class filled with only zeros.
 * @param {object} shape - List containing the shape of the new tensor Tensor.
 * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
 * @param {string} device - Device to store Tensor. Either "gpu" or "cpu".
 * @returns {object} New tensor.
 */
declare function zeros(shape: Array<number>, requires_grad?: boolean, device?: string): Tensor;
/**
 * Creates new instance of the Tensor class filled with only ones.
 * @param {object} shape - List containing the shape of the new tensor Tensor.
 * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
 * @param {string} device - Device to store Tensor. Either "gpu" or "cpu".
 * @returns {object} New tensor.
 */
declare function ones(shape: Array<number>, requires_grad?: boolean, device?: string): Tensor;
/**
 * Creates new instance of a lower-triangular 2D Tensor.
 * @param {object} shape - List containing the shape of the new tensor Tensor.
 * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
 * @param {string} device - Device to store Tensor. Either "gpu" or "cpu".
 * @returns {object} New tensor.
 */
declare function tril(shape: Array<number>, requires_grad?: boolean, device?: string): Tensor;
/**
 * Creates new instance of the Tensor class filled with numbers in a uniform distribution in ]0,1[.
 * @param {object} shape - List containing the shape of the new tensor Tensor.
 * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
 * @param {string} device - Device to store Tensor. Either "gpu" or "cpu".
 * @returns {object} New tensor.
 */
declare function rand(shape: Array<number>, requires_grad?: boolean, device?: string): Tensor;
/**
 * Creates new instance of the Tensor class filled with numbers in a normal distribution.
 * @param {object} shape - List containing the shape of the new tensor Tensor.
 * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
 * @param {string} device - Device to store Tensor. Either "gpu" or "cpu".
 * @param {boolean} xavier - Whether to use xavier initialization (divide by square root of first input dimension).
 * @returns {object} New tensor.
 */
declare function randn(shape: Array<number>, requires_grad?: boolean, device?: string, xavier?: boolean): Tensor;
/**
 * Creates new instance of the Tensor class filled with random integers between low and high.
 * @param {number} low - Lowest number that can be sampled.
 * @param {number} high - One above highest number that can be sampled.
 * @param {object} shape - List containing the shape of the new tensor Tensor.
 * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
 * @returns {object} New tensor.
 */
declare function randint(low?: number, high?: number, shape?: number[], requires_grad?: boolean): Tensor;
/**
 * Broadcasts tensor "a" into shape of "b".
 * If the shape gets smaller, tensor will be summed. If it gets larger, tensor will be expanded.
 * @param {object} a - First tensor, will be broadcast into shape of second.
 * @param {object} b - Second tensor.
 * @returns {object} New tensor.
 * @example
 * // Returns tensor with shape [4,3,2]:
 * broadcast(randn([3,2]), randn([4,3,2]));
 *
 * // Returns tensor with shape [4,5,3,1]:
 * broadcast(ones([5,3,2]), ones([4,5,3,1]));
 */
declare function broadcast(a: Tensor, b: Tensor): Tensor;

interface ModuleInterface {
    [key: string]: Module | Parameter | Tensor | any;
    parameters(): (Parameter | Tensor)[];
    train(): void;
    eval(): void;
    entries(): [string, Module | Parameter | Tensor | any][];
    mode: "train" | "eval";
}
declare class Module implements ModuleInterface {
    [key: string]: Module | Parameter | Tensor | any;
    mode: "train" | "eval";
    /**
     * Returns all model parameters in a list.
     * @returns {object} List with parameters in the model.
     */
    parameters(): (Parameter | Tensor)[];
    /**
     * Sets module's mode to train, which influences layers like Dropout
     */
    train(): void;
    /**
     * Sets module's mode to eval, which influences layers like Dropout
     */
    eval(): void;
    /**
     * Returns an array of key/values of the enumerable properties of the Module
     * @returns {object} List with parameters in the model.
     */
    entries(): [string, Module | Parameter | Tensor | any][];
}
declare class Linear extends Module {
    W: Tensor;
    b: Tensor;
    has_bias: boolean;
    /**
     * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
     *
     * @param {number} in_size - size of the last dimention of the input array.
     * @param {number} out_size - size of the last dimention of the output array.
     * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
     * @param {boolean} bias - wether to include a bias term.
     * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
     */
    constructor(in_size: number, out_size: number, device?: string, bias?: boolean, xavier?: boolean);
    /**
     * Performs forward pass through the Linear layer.
     * @param {Tensor} x - input Tensor.
     * @returns {Tensor} new Tensor. Out = (In @ W) + b.
     */
    forward(x: Tensor): Tensor;
}
declare class MultiHeadSelfAttention extends Module {
    Wk: Linear;
    Wq: Linear;
    Wv: Linear;
    residual_proj: Linear;
    mask: Tensor;
    att_dropout: Dropout;
    residual_dropout: Dropout;
    softmax: Softmax;
    H: number;
    /**
     * Full transformer Layer implementation.
     *
     * @param {number} in_size - size of the last dimention of the input array.
     * @param {number} out_size - size of the last dimention of the output array.
     * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
     * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
     * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
     * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
     */
    constructor(in_size: number, out_size: number, n_heads: number, n_timesteps: number, dropout_prob?: number, device?: string);
    /**
     * Performs Multi Head Self-Attention on "x" tensor.
     * @param {Tensor} x - input Tensor.
     * @returns {Tensor} new Tensor.
     */
    forward(x: Tensor): Tensor;
}
declare class FullyConnected extends Module {
    l1: Linear;
    relu: ReLU;
    l2: Linear;
    dropout: Dropout;
    /**
     * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
     *
     * @param {number} in_size - size of the last dimention of the input array.
     * @param {number} out_size - size of the last dimention of the output array.
     * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
     * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
     * @param {boolean} bias - wether to include a bias term.
     */
    constructor(in_size: number, out_size: number, dropout_prob?: number, device?: string, bias?: boolean);
    /**
     *  Passes "x" tensor through the Fully Connected layers.
     * @param {Tensor} x - input Tensor.
     * @returns {Tensor} new Tensor.
     */
    forward(x: Tensor): Tensor;
}
declare class Block extends Module {
    att: MultiHeadSelfAttention;
    ln1: LayerNorm;
    fcc: FullyConnected;
    ln2: LayerNorm;
    /**
     * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
     *
     * @param {number} in_size - size of the last dimention of the input array.
     * @param {number} out_size - size of the last dimention of the output array.
     * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
     * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
     * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
     * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
     */
    constructor(in_size: number, out_size: number, n_heads: number, n_timesteps: number, dropout_prob?: number, device?: string);
    /**
     * Passes "x" tensor through a full transformer Block.
     * @param {Tensor} x - input Tensor.
     * @returns {Tensor} new Tensor.
     */
    forward(x: Tensor): Tensor;
}
declare class Embedding extends Module {
    E: Tensor;
    /**
     * Embedding class, turns indexes into vectors.
     *
     * @param {number} vocab_size - number of different indexes (vocabulary size).
     * @param {number} embed_size - size of the embedding vector generated.
     */
    constructor(vocab_size: number, embed_size: number);
    /**
     * Extracts embedding from rows in "idx":
     * @param {Tensor} idx - rows to get embedding from.
     * @returns {Tensor} new Tensor. Out = (In @ W) + b.
     */
    forward(idx: Tensor): Tensor;
}
declare class PositionalEmbedding extends Module {
    E: Tensor;
    /**
     * Embedding class, turns indexes into vectors based on it's position through an optimized lookup table.
     *
     * @param {number} input_size - number of different embeddings (size of the input).
     * @param {number} embed_size - size of the embedding vector generated.
     */
    constructor(input_size: number, embed_size: number);
    /**
     * Gets embedding for timesteps in "idx" array.
     * @param {object} idx - Array [Batch x Timesteps]. Timesteps will be filled with positional embeddings.
     * @returns {Tensor} new Tensor.
     */
    forward(idx: Tensor): Tensor;
}
declare class ReLU extends Module {
    /**
     * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
     */
    constructor();
    /**
     * Performs forward pass through Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
     * @param {Tensor} z - input Tensor.
     * @returns {Tensor} new Tensor.
     */
    forward(z: Tensor): Tensor;
}
declare class Softmax extends Module {
    /**
     * Softmax nonlinearity class. Returns distribution of values (sum=1).
     */
    constructor();
    /**
     * Performs forward pass through Softmax nonlinearity.
     * @param {Tensor} z - input Tensor.
     * @param {number} dim - dimension across which to apply Softmax.
     * @returns {Tensor} new Tensor.
     */
    forward(z: Tensor, dim?: number): Tensor;
}
declare class Dropout extends Module {
    p: number;
    /**
     * Dropout class, added usually after other layers, to drop values to zero with given probability
     *
     * @param {number} drop_prob - probability to drop each value in input.
     */
    constructor(drop_prob: number);
    /**
     * Performs forward pass through Dropout layer. Sets random values to zero (this.p % of the total).
     * @param {Tensor} z - input Tensor.
     * @returns {Tensor} new Tensor.
     */
    forward(z: Tensor): Tensor;
}
declare class LayerNorm extends Module {
    gamma: Tensor;
    beta: Tensor;
    /**
     * Layer Norm class, added usually after other layers to normalize across all of the output.
     *
     * @param {number} n_embed - size of the last dimention of the input.
     */
    constructor(n_embed: number);
    forward(x: Tensor): Tensor;
}
declare class CrossEntropyLoss extends Module {
    /**
     * Cross Entropy Loss class, returns the loss given the output and the expected indexes.
     */
    constructor();
    /**
     * Performs forward pass through CrossEntropyLoss, returns loss.
     * @param {Tensor} z - Output from the last layer of the network. Must have shape like (*Batch dimentions, Number of possible classes).
     * @param {object} y - Correct indexes expected from the model.
     * @returns {object} Negative-log-likelihood loss of the model output.
     */
    forward(z: Tensor, y: Tensor): Tensor;
}
/**
 * Mean Squared Error Loss class, returns the loss given the network output and the expected output.
 */
declare class MSELoss extends Module {
    /**
     * Constructor.
     */
    constructor();
    /**
     * Performs forward pass through MSELoss, returns loss.
     * @param {Tensor} z - Output from the last layer of the network.
     * @param {object} y - Correct outputs expected from the model.
     * @returns {object} Mean Squared Error loss of the model output.
     */
    forward(z: Tensor, y: Tensor): Tensor;
}
/**
 * Saves the model to a JSON file.
 * @param {Module} model - Model to be saved in JSON file.
 * @param {string} file - JSON file.
 */
declare function save(model: Module, file: string): void;
/**
 * Loads a model from a JSON file.
 * @param {Module} model - Blank model to load weights into (placeholder). Needs to be identical to model.
 * @param {string} file - JSON file.
 * @returns {Module} loadedModel - Model to be loaded from JSON file.
 */
declare function load(model: Module, file: string): Module;

declare class Adam {
    params: (Parameter | Tensor)[];
    lr: number;
    reg: number;
    b1: number;
    b2: number;
    eps: number;
    /**
     * Adam optimizer class.
     * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
     * @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
     * @param {number} reg - Scalar controling strength l2 regularization.
     * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
     * @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
     */
    constructor(params: (Parameter | Tensor)[], lr?: number, reg?: number, betas?: number[], eps?: number);
    /**
     * Updates all parameters in this.params with their gradients.
     */
    step(): void;
    /**
     * Sets all the gradients of self.params to zero.
     */
    zero_grad(): void;
}

type NestedArray<T> = T | NestedArray<T>[];

/**
 * Recursively gets the shape (length of every dimension) of the Tensor.
 * @param {object} data - Iterable containing the data to be stored in the Tensor.
 * @param {object} shape - Length of every dimension of the Tensor.
 * @returns {object} Length of every dimension of the Tensor.
 */
declare function getShape(data: NestedArray<number> | number, shape?: Array<number>): Array<number>;

declare const torch: {
    Tensor: typeof Tensor;
    Parameter: typeof Parameter;
    add: typeof add;
    neg: typeof neg;
    mul: typeof mul;
    div: typeof div;
    matmul: typeof matmul;
    exp: typeof exp;
    log: typeof log;
    sqrt: typeof sqrt;
    pow: typeof pow;
    mean: typeof mean;
    masked_fill: typeof masked_fill;
    variance: typeof variance;
    at: typeof at;
    reshape: typeof reshape;
    _reshape: typeof _reshape;
    transpose: typeof transpose;
    tensor: typeof tensor;
    randint: typeof randint;
    randn: typeof randn;
    rand: typeof rand;
    tril: typeof tril;
    ones: typeof ones;
    zeros: typeof zeros;
    broadcast: typeof broadcast;
    save: typeof save;
    load: typeof load;
    nn: {
        Module: typeof Module;
        Linear: typeof Linear;
        MultiHeadSelfAttention: typeof MultiHeadSelfAttention;
        FullyConnected: typeof FullyConnected;
        Block: typeof Block;
        Embedding: typeof Embedding;
        PositionalEmbedding: typeof PositionalEmbedding;
        ReLU: typeof ReLU;
        Softmax: typeof Softmax;
        Dropout: typeof Dropout;
        LayerNorm: typeof LayerNorm;
        CrossEntropyLoss: typeof CrossEntropyLoss;
        MSELoss: typeof MSELoss;
    };
    optim: {
        Adam: typeof Adam;
    };
    getShape: typeof getShape;
};

export { torch };