/** * @license * Copyright 2021, JsData. All rights reserved. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ========================================================================== */ import { convertToNumericTensor1D_2D, convertToNumericTensor2D } from '../utils' import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes, Tensor1D, Tensor2D, Tensor, ModelCompileArgs, ModelFitArgs } from '../types' import { OneHotEncoder } from '../preprocessing/OneHotEncoder' import { assert } from '../typesUtils' import { ClassifierMixin } from '../mixins' import { getBackend } from '../tf-singleton' /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. * With this base class and different error functions / regularizers we can * create SGD solvers for LinearRegression, RidgeRegression, LassoRegression, * ElasticNet, LogisticRegression and many more. */ /** * Parameters for SGD */ export interface SGDClassifierParams { /** * The complete list of compile args for the `model.compile` call from tensorflow.js. * We aim to provide sensible defaults depending on the regressor / classifier. * An example call might look like * model.compile({ optimizer: train.adam(0.1), loss: losses.meanSquaredError, metrics: ['mse'], }) */ modelCompileArgs: ModelCompileArgs /** * The complete list of `model.fit` args from Tensorflow.js * We aim to provide sensible defaults depending on the regressor / classifier. * An example call might look like * model.fit( batchSize: 32, epochs: 1000, verbose: 0, callbacks: [callbacks.earlyStopping({ monitor: 'mse', patience: 50 })], }) */ modelFitArgs: ModelFitArgs /** * The arguments for a single dense layer in tensorflow. This also defaults to * different settings based on the regressor / classifier. An example dense layer * might look like. * const model = sequential() model.add( layers.dense({ inputShape: [100], units: 1, useBias: true, }) ) */ denseLayerArgs: any //DenseLayerArgs /** * This class specifies that we are building a linear model that uses SGD. But there still is the * question, "Is this model performing classification or regression"? This argument answers that * definitely. It's a boolean that is true when the model aims to perform classification */ isClassification?: boolean optimizerType: OptimizerTypes lossType: LossTypes } export class SGDClassifier extends ClassifierMixin { model: any //tf.Sequential modelFitArgs: ModelFitArgs modelCompileArgs: ModelCompileArgs denseLayerArgs: any //DenseLayerArgs optimizerType: OptimizerTypes lossType: LossTypes oneHot: OneHotEncoder tf: any isMultiOutput: boolean constructor({ modelFitArgs, modelCompileArgs, denseLayerArgs, optimizerType, lossType }: SGDClassifierParams) { super() this.tf = getBackend() this.model = this.tf.sequential() this.modelFitArgs = modelFitArgs this.modelCompileArgs = modelCompileArgs this.denseLayerArgs = denseLayerArgs this.optimizerType = optimizerType this.lossType = lossType this.isMultiOutput = false // Next steps: Implement "drop" mechanics for OneHotEncoder // There is a possibility to do a drop => if_binary which would // squash down on the number of variables that we'd have to learn this.oneHot = new OneHotEncoder() } initializeModelForClassification(y: Tensor1D | Tensor2D): Tensor2D { let yToInt = y.toInt() // This covers the case of a dependent variable that is already one hot encoded. // There are other cases where you do "multi-variable output which isn't one hot encoded" // Like say you were predicting which diseases a person could have (hasCancer, hasMeningitis, etc) // Then you would have to run a sigmoid on each independent variable if (yToInt.shape.length === 2) { this.modelCompileArgs.loss = this.tf.losses.softmaxCrossEntropy return yToInt as Tensor2D } else { const yTwoD = y.reshape([-1, 1]) as Tensor2D const yTwoDOneHotEncoded = this.oneHot.fitTransform(yTwoD) if (this.oneHot.categories[0].length > 2) { this.modelCompileArgs.loss = this.tf.losses.softmaxCrossEntropy } else { this.modelCompileArgs.loss = this.tf.losses.sigmoidCrossEntropy } return yTwoDOneHotEncoded } } /** * Creates the tensorflow model. Because the model contains only * one dense layer, we must pass the inputShape to that layer. * That inputShape is only known at "runtime" ie... when we call `fit(X, y)` * that first time. The inputShape is effectively `X.shape[1]` * * This function runs after that first call to fit or when pass in modelWeights. * That can come up if we train a model in python, and simply want to copy over the * weights to this JS version so we can deploy on browsers / phones. * @returns {void} */ initializeModel( X: Tensor2D, y: Tensor1D | Tensor2D, weightsTensors: Tensor[] = [] ): void { this.denseLayerArgs.units = y.shape.length === 1 ? 1 : y.shape[1] const model = this.tf.sequential() model.add( this.tf.layers.dense({ inputShape: [X.shape[1]], ...this.denseLayerArgs }) ) model.compile(this.modelCompileArgs) if (weightsTensors?.length) { model.setWeights(weightsTensors) } this.model = model } /** * Similar to scikit-learn, this trains a model to predict y, from X. * Even in the case where we predict a single output vector, * the predictions are a 2D matrix (albeit a single column in a 2D Matrix). * * This is to facilitate the case where we predict multiple targets, or in the case * of classification where we are predicting a 2D Matrix of probability class labels. * @param {Scikit2D} X The 2DTensor / 2D Array that you wish to use as a training matrix * @param {ScikitVecOrMatrix} y Either 1D or 2D array / Tensor that you wish to predict * * @returns {Promise} Returns the predictions. * * We use a LinearRegression in the example below because it provides * defaults for the SGD * * @example * * lr = new LinearRegression() * await lr.fit(X, y); * // lr model weights have been updated */ public async fit( X: Scikit2D, y: Scikit1D | Scikit2D ): Promise { let XTwoD = convertToNumericTensor2D(X) let yOneD = convertToNumericTensor1D_2D(y) const yTwoD = this.initializeModelForClassification(yOneD) if (yOneD.shape.length > 1) { this.isMultiOutput = true } if (this.model.layers.length === 0) { this.initializeModel(XTwoD, yTwoD) } await this.model.fit(XTwoD, yTwoD, { ...this.modelFitArgs }) return this } /** * This aims to be a bridge to scikit-learn Estimators, where users can train * models over in scikit-learn and then ship the coefficients into the proper * Estimator on the Scikit.js side. This can be useful if the python version is faster * to train, but we still need a JS version because we wish to ship to mobile or browsers. * * @param {{ coef: number[]; intercept: number }} params The object that contains the model parameters, * coef, and intercept that we need for our model. * * @returns {SGD} Returns the predictions. * * We use a LinearRegression in the example below because it provides * defaults for the SGD * * @example * * lr = new LinearRegression() * lr.importModel({coef : [1.2, 2.3], intercept: 10.0}); * // lr model weights have been updated */ importModel(params: { coef: number[]; intercept: number }): SGDClassifier { // Next steps: Need to update for possible 2D coef case, and 1D intercept case let myCoef = this.tf.tensor2d( params.coef, [params.coef.length, 1], 'float32' ) let myIntercept = this.tf.tensor1d([params.intercept], 'float32') this.initializeModel(myCoef, myIntercept, [myCoef, myIntercept]) return this } /** * Similar to scikit-learn, this returns the object of configuration params for SGD * @returns {SGDClassifierParams} Returns an object of configuration params. * * We use a LinearRegression in the example below because it provides * defaults for the SGD * * @example * * lr = new LinearRegression() * lr.getParams() * // => { modelCompileArgs: { optimizer: train.adam(0.1), loss: losses.meanSquaredError, metrics: ['mse'], }, modelFitArgs: { batchSize: 32, epochs: 1000, verbose: 0, callbacks: [callbacks.earlyStopping({ monitor: 'mse', patience: 50 })], }, denseLayerArgs: { units: 1, useBias: true, } } */ getParams(): SGDClassifierParams { return { modelFitArgs: this.modelFitArgs, modelCompileArgs: this.modelCompileArgs, denseLayerArgs: this.denseLayerArgs, optimizerType: this.optimizerType, lossType: this.lossType } } /** * Similar to scikit-learn, this returns the object of configuration params for SGD * @returns {SGDClassifierParams} Returns an object of configuration params. * * We use a LinearRegression in the example below because it provides * defaults for the SGD * * @example * * lr = new LinearRegression() * lr.setParams({ modelFitArgs: { batchSize: 100, epochs: -1, verbose: 1, }) */ setParams(params: SGDClassifierParams): SGDClassifier { this.modelCompileArgs = params.modelCompileArgs this.modelFitArgs = params.modelFitArgs this.denseLayerArgs = params.denseLayerArgs return this } public predictProba(X: Scikit2D): Tensor2D { assert(this.model.layers.length > 0, 'Need to call "fit" before "predict"') let XTwoD = convertToNumericTensor2D(X) return this.model.predict(XTwoD) as Tensor2D } /** * Similar to scikit-learn, this returns a Tensor2D (2D Matrix) of predictions. * Even in the case where we predict a single output vector, * the predictions are a 2D matrix (albeit a single column in a 2D Matrix). * * This is to facilitate the case where we predict multiple targets, or in the case * of classification where we are predicting a 2D Matrix of probability class labels. * @param {Scikit2D} X The 2DTensor / 2D Array that you wish to run through * your model and make predictions. * * @returns {Tensor2D} Returns the predictions. * * We use a LinearRegression in the example below because it provides * defaults for the SGD * * @example * * lr = new LinearRegression() * await lr.fit(X, y); * lr.predict(X) * // => tensor2d([[ 4.5, 10.3, 19.1, 0.22 ]]) */ public predict(X: Scikit2D): Tensor1D { assert(this.model.layers.length > 0, 'Need to call "fit" before "predict"') const y2D = this.predictProba(X) if (this.isMultiOutput) { return this.tf.oneHot(y2D.argMax(1), y2D.shape[1]) } return this.tf.tensor1d(this.oneHot.inverseTransform(y2D)) } /** * Similar to scikit-learn, this returns the coefficients of our linear model. * The return type is a 1D matrix (technically a Tensor1D) if we predict a single output. * It's a 2D matrix (Tensor2D) if we predict a regression task with multiple outputs or * a classification task with multiple class labels. * @returns {Tensor1D | Tensor2D} Returns the coefficients. * * We use a LinearRegression in the example below because it provides * defaults for the SGD * * @example * * lr = new LinearRegression() * await lr.fit(X, [1,2,3]); * lr.coef * // => tensor1d([[ 1.2, 3.3, 1.1, 0.2 ]]) * * await lr.fit(X, [ [1,2], [3,4], [5,6] ]); * lr.coef * // => tensor2d([ [1.2, 3.3], [3.4, 5.6], [4.5, 6.7] ]) */ get coef(): Tensor1D | Tensor2D { const modelWeights = this.model.getWeights() if (modelWeights.length === 0) { return this.tf.tensor2d([]) } let coefficients = modelWeights[0] if (coefficients.shape[1] === 1) { return coefficients.reshape([coefficients.shape[0]]) as Tensor1D } return coefficients as Tensor2D } /** * Similar to scikit-learn, this returns the intercept of our linear model. * The return type is always a Tensor1D (a vector). * Normally we'd just return a single number but in the case * of multiple regression (multiple output targets) we'd need * a vector to store all the intercepts, * @returns {number | Tensor1D} Returns the intercept. * * We use a LinearRegression in the example below because it provides * defaults for the SGD * * @example * * lr = new LinearRegression() * await lr.fit(X, [1,2,3]); * lr.intercept * // => 4.5 * * * lr = new LinearRegression() * await lr.fit(X, [ [1,2,3], [4,5,6] ]); * lr.intercept * // => tensor1d([1.2, 2.3]) */ get intercept(): number | Tensor1D { const modelWeights = this.model.getWeights() if (modelWeights.length < 2) { return 0.0 } let intercept = modelWeights[1] as Tensor1D if (intercept.size === 1) { return intercept.arraySync()[0] } return intercept } }