/** * @license * Copyright 2021, JsData. All rights reserved. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ========================================================================== */ import { Scikit1D, Scikit2D, Tensor1D, Tensor2D } from '../types'; import { TransformerMixin } from '../mixins'; export interface OneHotEncoderParams { /** * Categories (unique values) per feature: * ‘auto’ : Determine categories automatically from the training data. * list : categories[i] holds the categories expected in the ith column. * The passed categories should not mix strings and numeric values, and should be sorted in case of numeric values. * **default = "auto"** */ categories?: 'auto' | (number | string | boolean)[][]; /** When set to ‘error’ an error will be raised in case an unknown categorical * feature is present during transform. When set to ‘ignore’, * the encoded value of will be all zeros * In inverse_transform, an unknown category will be denoted as null. * **default = "error"** */ handleUnknown?: 'error' | 'ignore'; /** * Specifies a methodology to use to drop one of the categories per feature. * This is useful in situations where perfectly collinear features cause problems, such as when * feeding the resulting data into a neural network or an unregularized regression. * However, dropping one category breaks the symmetry of the original representation and can therefore induce a bias in * downstream models, for instance for penalized linear classification or regression models. * * Options: * undefined : retain all features (the default). * ‘first’ : drop the first category in each feature. If only one category is present, the feature will be dropped entirely. * **default = undefined** */ drop?: 'first'; } /** * Fits a OneHotEncoder to the data. * * @example * ```js * import { OneHotEncoder } from 'scikitjs' * * * const X = [ ['Male', 1], ['Female', 2], ['Male', 4] ] const encode = new OneHotEncoder() encode.fitTransform(X) // returns the object below const expected = [ [1, 0, 1, 0, 0], [0, 1, 0, 1, 0], [1, 0, 0, 0, 1] ] * ``` */ export declare class OneHotEncoder extends TransformerMixin { /** categories is a list of unique labels per feature */ categories: (number | string | boolean)[][]; handleUnknown?: 'error' | 'ignore'; /** This holds the categories parameter that is passed in the constructor. `this.categories` * holds the actual learned categories or the ones passed in from the constructor */ categoriesParam: 'auto' | (number | string | boolean)[][]; drop?: 'first'; /** The number of features seen during fit */ nFeaturesIn: number; /** Names of features seen during fit. Only stores feature names if input is a DataFrame */ featureNamesIn: Array; /** Useful for pipelines and column transformers to have a default name for transforms */ name: string; constructor({ categories, handleUnknown, drop }?: OneHotEncoderParams); classesToMapping(classes: Array): Map; loopOver2DArrayToSetLabels(array2D: any): void; /** * Fits a OneHotEncoder to the data. * @param data 1d array of labels, Tensor, or Series to be encoded. * @returns OneHotEncoder * @example * ```js * const encoder = new OneHotEncoder() * encoder.fit(["a", "b", "c"]) * ``` */ fit(X: Scikit2D, y?: Scikit1D): OneHotEncoder; loopOver2DArrayToUseLabels(array2D: any): number[][]; /** Generalization of the tf.oneHot that can handle "one-hotting" with a single column * output. */ convertToOneHot(tensor: Tensor1D, numberOfOneHotColumns: number): Tensor2D; /** * Encodes the data using the fitted OneHotEncoder. * @param data 1d array of labels, Tensor, or Series to be encoded. * @example * ```js * const encoder = new OneHotEncoder() * encoder.fit(["a", "b", "c"]) * encoder.transform(["a", "b", "c"]) * ``` */ transform(X: Scikit2D, y?: Scikit1D): Tensor2D; /** Only works for single column OneHotEncoding */ inverseTransform(X: Tensor2D): any[]; }