import { MemoryData } from '@rapidsai/cuda'; import { DeviceBuffer, MemoryResource } from '@rapidsai/rmm'; import * as arrow from 'apache-arrow'; import { Column } from './column'; import { ColumnAccessor } from './column_accessor'; import { JoinResult } from './dataframe/join'; import { DisplayOptions } from './dataframe/print'; import { GroupByMultiple, GroupByMultipleProps, GroupBySingle, GroupBySingleProps } from './groupby'; import { Series } from './series'; import { Table } from './table'; import { ReadCSVOptions, ReadCSVOptionsCommon, WriteCSVOptions } from './types/csv'; import { Bool8, DataType, FloatingPoint, IndexType, Int32, Integral, List, Numeric } from './types/dtypes'; import { DuplicateKeepOption, NullOrder } from './types/enums'; import { ColumnsMap, CommonType, TypeMap } from './types/mappings'; import { ReadORCOptions, ReadORCOptionsCommon, WriteORCOptions } from './types/orc'; import { ReadParquetOptions, ReadParquetOptionsCommon, WriteParquetOptions } from './types/parquet'; export declare type SeriesMap = { [P in keyof T]: { readonly type: T[P]; }; }; export declare type OrderSpec = { ascending?: boolean; null_order?: keyof typeof NullOrder; }; declare type JoinType = 'inner' | 'outer' | 'left' | 'right' | 'leftsemi' | 'leftanti'; declare type JoinProps = { other: DataFrame; on: TOn[]; how?: How; lsuffix?: LSuffix; rsuffix?: RSuffix; nullEquality?: boolean; memoryResource?: MemoryResource; }; /** * A GPU Dataframe object. */ export declare class DataFrame { /** * Construct a DataFrame from a Table and list of column names. * * @param table The cudf.Table instance * @param names List of string Column names */ static fromTable(table: Table, names: readonly (string & keyof T)[]): DataFrame; /** * Read a CSV file from disk and create a cudf.DataFrame * * @example * ```typescript * import * as cudf from '@rapidsai/cudf'; * const df = cudf.DataFrame.readCSV('test.csv', { * header: 0, * dataTypes: { * a: new cudf.Int16, * b: new cudf.Bool, * c: new cudf.Float32, * d: new cudf.Utf8String * } * }) * ``` */ static readCSV(path: string, options?: ReadCSVOptionsCommon): DataFrame; /** * Read a CSV file from disk and create a cudf.DataFrame * * @example * ```typescript * import {DataFrame, Series, Int16, Bool, Float32, Utf8String} from '@rapidsai/cudf'; * const df = DataFrame.readCSV({ * header: 0, * sourceType: 'files', * sources: ['test.csv'], * dataTypes: { * a: new Int16, * b: new Bool, * c: new Float32, * d: new Utf8String * } * }) * ``` */ static readCSV(options: ReadCSVOptions): DataFrame; /** * Read Apache ORC files from disk and create a cudf.DataFrame * * @example * ```typescript * import {DataFrame} from '@rapidsai/cudf'; * const df = DataFrame.readORC('test.orc', { * skipRows: 10, numRows: 10, * }) * ``` */ static readORC(paths: string | (string[]), options?: ReadORCOptionsCommon): DataFrame; /** * Read Apache ORC files from disk and create a cudf.DataFrame * * @example * ```typescript * import {DataFrame} from '@rapidsai/cudf'; * const df = DataFrame.readORC({ * sourceType: 'files', * sources: ['test.orc'], * }) * ``` */ static readORC(options: ReadORCOptions): DataFrame; /** * Read Apache Parquet files from disk and create a cudf.DataFrame * * @example * ```typescript * import {DataFrame} from '@rapidsai/cudf'; * const df = DataFrame.readParquet('test.parquet', { * skipRows: 10, numRows: 10, * }) * ``` */ static readParquet(paths: string | (string[]), options?: ReadParquetOptionsCommon): DataFrame; /** * Read Apache Parquet files from disk and create a cudf.DataFrame * * @example * ```typescript * import {DataFrame} from '@rapidsai/cudf'; * const df = DataFrame.readParquet({ * sourceType: 'files', * sources: ['test.parquet'], * }) * ``` */ static readParquet(options: ReadParquetOptions): DataFrame; /** * Adapts an Arrow Table in IPC format into a DataFrame. * * @param memory A buffer holding Arrow table * @return The Arrow data as a DataFrame */ static fromArrow(memory: DeviceBuffer | MemoryData): DataFrame; private _accessor; /** * Create a new cudf.DataFrame * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([1, 2]), * b: Series.new([true, false]), * c: Series.new(["foo", "bar"]) * }) * * ``` */ constructor(data?: SeriesMap); constructor(data?: ColumnsMap); constructor(data?: ColumnAccessor); /** * The number of rows in each column of this DataFrame * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([1, 2]), * b: Series.new([1, 2]), * c: Series.new([1, 2]) * }) * * df.numRows // 2 * ``` */ get numRows(): number; /** * The number of columns in this DataFrame * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([1, 2]), * b: Series.new([1, 2]), * c: Series.new([1, 2]) * }) * * df.numColumns // 3 * ``` */ get numColumns(): number; /** * The names of columns in this DataFrame * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([1, 2]), * b: Series.new([1, 2]), * c: Series.new([1, 2]) * }) * * df.names // ['a', 'b', 'c'] * ``` */ get names(): readonly (string & keyof T)[]; /** * A map of this DataFrame's Series names to their DataTypes * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([1, 2]), * b: Series.new(["foo", "bar"]), * c: Series.new([[1, 2], [3]]), * }) * * df.types * // { * // a: [Object Float64], * // b: [Object Utf8String], * // c: [Object List] * // } * ``` */ get types(): T; /** @ignore */ asTable(): Table; /** @ignore */ asStruct(): import("./series").StructSeries; /** * Return a string with a tabular representation of the DataFrame, pretty-printed according to the * options given. * * @param options */ toString(options?: DisplayOptions): string; /** * Return a new DataFrame containing only specified columns. * * @param columns Names of columns keep. * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([0, 1, 1, 2, 2, 2]), * b: Series.new([0, 1, 2, 3, 4, 4]), * c: Series.new([1, 2, 3, 4, 5, 6]) * }) * * df.select(['a', 'b']) // returns df with {a, b} * ``` */ select(names: readonly R[]): DataFrame<{ [P in R]: T[P]; }>; /** * Return a new DataFrame with new columns added. * * @param {SeriesMap|DataFrame} data mapping of names to new columns to add, or a GPU * DataFrame object * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({a: [1, 2, 3]}); * * df.assign({b: Series.new(["foo", "bar", "bar"])}) * // returns df {a: [1, 2, 3], b: ["foo", "bar", "bar"]} * ``` * * @example * ```typescript * import {DataFrame} from '@rapidsai/cudf'; * * const df = new DataFrame({a: [1, 2, 3]}); * const df1 = new DataFrame({b: ["foo", "bar", "bar"]}); * * df.assign(df1) // returns df {a: [1, 2, 3], b: ["foo", "bar", "bar"]} * ``` */ assign(data: SeriesMap | DataFrame): DataFrame<{ [P in keyof (T & R)]: P extends keyof R ? R[P] : P extends keyof T ? T[P] : never; }>; /** * Return a new DataFrame with specified columns removed. * * @param names Names of the columns to drop. * * @example * ```typescript * import {DataFrame, Series, Int32, Float32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 1, 2, 2, 2]}), * b: Series.new({type: new Float32, data: [0, 1, 2, 3, 4, 4]}) * }); * * df.drop(['a']) // returns df {b: [0, 1, 2, 3, 4, 4]} * ``` */ drop(names: readonly R[]): DataFrame<{ [P in Exclude]: T[P]; }>; /** * Return a new DataFrame with specified columns renamed. * * @param nameMap Object mapping old to new Column names. * * @example * ```typescript * import {DataFrame, Series, Int32, Float32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 1, 2, 2, 2]}), * b: Series.new({type: new Float32, data: [0, 1, 2, 3, 4, 4]}) * }); * * df.rename({a: 'c'}) // returns df {b: [0, 1, 2, 3, 4, 4], c: [0, 1, 1, 2, 2, 2]} * ``` */ rename(nameMap: P): DataFrame<{ [P_2 in keyof ({ [P_1 in Exclude]: T[P_1]; } & { [K in keyof P as `${NonNullable}`]: T[string & K]; })]: P_2 extends keyof { [K in keyof P as `${NonNullable}`]: T[string & K]; } ? { [K in keyof P as `${NonNullable}`]: T[string & K]; }[P_2] : P_2 extends Exclude ? { [P_1 in Exclude]: T[P_1]; }[P_2] : never; }>; /** * Return whether the DataFrame has a Series. * * @param name Name of the Series to return. * * @example * ```typescript * import {DataFrame, Series, Int32, Float32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 1, 2, 2, 2]}), * b: Series.new({type: new Float32, data: [0, 1, 2, 3, 4, 4]}) * }); * * df.has('a') // true * df.has('c') // false * ``` */ has(name: string): boolean; /** * Return a series by name. * * @param name Name of the Series to return. * * @example * ```typescript * import {DataFrame, Series, Int32, Float32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 1, 2, 2, 2]}), * b: Series.new({type: new Float32, data: [0, 1, 2, 3, 4, 4]}) * }); * * df.get('a') // Int32Series * df.get('b') // Float32Series * ``` */ get

(name: P): Series; /** * Casts each selected Series in this DataFrame to a new dtype (similar to `static_cast` in C++). * * @param dataTypes The map from column names to new dtypes. * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * @returns DataFrame of Series cast to the new dtype * * @example * ```typescript * import {DataFrame, Series, Int32, Float32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 1, 2, 2, 2]}), * b: Series.new({type: new Int32, data: [0, 1, 2, 3, 4, 4]}) * }); * * df.cast({a: new Float32}); // returns df with a as Float32Series and b as Int32Series * ``` */ cast(dataTypes: R, memoryResource?: MemoryResource): DataFrame<{ [P in keyof (Omit & R)]: (Omit & R)[P]; }>; /** * Casts all the Series in this DataFrame to a new dtype (similar to `static_cast` in C++). * * @param dataType The new dtype. * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * @returns DataFrame of Series cast to the new dtype *make notebooks.run * a: Series.new({type: new Int32, data: [0, 1, 1, 2, 2, 2]}), * b: Series.new({type: new Int32, data: [0, 1, 2, 3, 4, 4]}) * }) * * df.castAll(new Float32); // returns df with a and b as Float32Series * ``` */ castAll(dataType: R, memoryResource?: MemoryResource): DataFrame<{ [P in keyof T]: R; }>; /** * Concat DataFrame(s) to the end of the caller, returning a new DataFrame. * * @param others The DataFrame(s) to concat to the end of the caller. * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([1, 2, 3, 4]), * b: Series.new([1, 2, 3, 4]), * }); * * const df2 = new DataFrame({ * a: Series.new([5, 6, 7, 8]), * }); * * df.concat(df2); * // return { * // a: [1, 2, 3, 4, 5, 6, 7, 8], * // b: [1, 2, 3, 4, null, null, null, null], * // } * ``` */ concat(...others: U): import("./dataframe/concat").ConcatTypeMap[keyof import("./dataframe/concat").ConcatTypeMap] extends never ? never : DataFrame<{ [P in keyof import("./dataframe/concat").ConcatTypeMap]: import("./dataframe/concat").ConcatTypeMap[P]; }>; /** * @summary Explicitly free the device memory associated with this DataFrame. */ dispose(): void; /** * @summary Flatten the elements of this DataFrame's list columns, duplicating the corresponding * rows for other columns in this DataFrame. * * @param {string[]} names Names of List Columns to flatten. Defaults to all list Columns. * @param {boolean} [includeNulls=true] Whether to retain null entries and map empty lists to * null. * @param memoryResource An optional MemoryResource used to allocate the result's device memory. */ flatten(names?: readonly R[], includeNulls?: boolean, memoryResource?: MemoryResource): DataFrame<{ [P in keyof T | R]: P extends R ? T[P] extends List ? T[P] extends List ? T[P]["valueType"] : T[P] : T[P] : T[P]; }>; /** * @summary Flatten the elements of this DataFrame's list columns into their positions in its * original list, duplicating the corresponding rows for other columns in this DataFrame. * * @param {string[]} names Names of List Columns to flatten. Defaults to all list Columns. * @param {boolean} [includeNulls=true] Whether to retain null entries and map empty lists to * null. * @param memoryResource An optional MemoryResource used to allocate the result's device memory. */ flattenIndices(names?: readonly R[], includeNulls?: boolean, memoryResource?: MemoryResource): DataFrame<{ [P in keyof T | R]: P extends R ? T[P] extends List ? Int32 : T[P] : T[P]; }>; /** * @summary Interleave columns of a DataFrame into a single Series. * * @param dataType The dtype of the result Series (required if the DataFrame has mixed dtypes). * @param memoryResource An optional MemoryResource used to allocate the result's device memory. * * @returns Series representing a packed row-major matrix of all the source DataFrame's Series. * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * new DataFrame({ * a: Series.new([1, 2, 3]), * b: Series.new([4, 5, 6]), * }).interleaveColumns() * // Float64Series [ * // 1, 4, 2, 5, 3, 6 * // ] * * new DataFrame({ * b: Series.new([ [0, 1, 2], [3, 4, 5], [6, 7, 8]]), * c: Series.new([[10, 11, 12], [13, 14, 15], [16, 17, 18]]), * }).interleaveColumns() * // ListSeries [ * // [0, 1, 2], * // [10, 11, 12], * // [3, 4, 5], * // [13, 14, 15], * // [6, 7, 8], * // [16, 17, 18], * // ] * */ interleaveColumns(dataType?: R | null, memoryResource?: MemoryResource): Series; /** * Generate an ordering that sorts DataFrame columns in a specified way * * @param options mapping of column names to sort order specifications * @param memoryResource An optional MemoryResource used to allocate the result's device memory. * * @returns Series containting the permutation indices for the desired sort order * * @example * ```typescript * import {DataFrame, Series, Int32, NullOrder} from '@rapidsai/cudf'; * const df = new DataFrame({a: Series.new([null, 4, 3, 2, 1, 0])}); * * df.orderBy({a: {ascending: true, null_order: 'before'}}); * // Int32Series [0, 5, 4, 3, 2, 1] * * df.orderBy({a: {ascending: true, null_order: 'after'}}); * // Int32Series [5, 4, 3, 2, 1, 0] * * df.orderBy({a: {ascending: false, null_order: 'before'}}); * // Int32Series [1, 2, 3, 4, 5, 0] * * df.orderBy({a: {ascending: false, null_order: 'after'}}); * // Int32Series [0, 1, 2, 3, 4, 5] * ``` */ orderBy(options: { [P in R]: OrderSpec; }, memoryResource?: MemoryResource): import("./series").Int32Series; /** * Generate a new DataFrame sorted in the specified way. * * @param ascending whether to sort ascending (true) or descending (false) * Default: true * @param null_order whether nulls should sort before or after other values * Default: after * * @returns A new DataFrame of sorted values * * @example * ```typescript * import {DataFrame, Series, Int32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([null, 4, 3, 2, 1, 0]), * b: Series.new([0, 1, 2, 3, 4, 5]) * }); * * df.sortValues({a: {ascending: true, null_order: 'after'}}) * // {a: [0, 1, 2, 3, 4, null], b: [5, 4, 3, 2, 1, 0]} * * df.sortValues({a: {ascending: true, null_order: 'before'}}) * // {a: [null, 0, 1, 2, 3, 4], b: [0, 5, 4, 3, 2, 1]} * * df.sortValues({a: {ascending: false, null_order: 'after'}}) * // {a: [4, 3, 2, 1, 0, null], b: [1, 2, 3, 4, 5, 0]} * * df.sortValues({a: {ascending: false, null_order: 'before'}}) * // {a: [null, 4, 3, 2, 1, 0], b: [0, 1, 2, 3, 4, 5]} * ``` */ sortValues(options: { [P in R]: OrderSpec; }, memoryResource?: MemoryResource): DataFrame; /** * @summary Return sub-selection from a DataFrame using the specified integral indices. * * @description Gathers the rows of the source columns according to `selection`, such that row "i" * in the resulting Table's columns will contain row `selection[i]` from the source columns. The * number of rows in the result table will be equal to the number of elements in selection. A * negative value i in the selection is interpreted as i+n, where `n` is the number of rows in * the source table. * * For dictionary columns, the keys column component is copied and not trimmed if the gather * results in abandoned key elements. * * @param selection A Series of 8/16/32-bit signed or unsigned integer indices to gather. * @param nullify_out_of_bounds If `true`, coerce rows that corresponds to out-of-bounds indices * in the selection to null. If `false`, skips all bounds checking for selection values. Pass * false if you are certain that the selection contains only valid indices for better * performance. If `false` and there are out-of-bounds indices in the selection, the behavior * is undefined. Defaults to `false`. * @param memoryResource An optional MemoryResource used to allocate the result's device memory. * * @example * ```typescript * import {DataFrame, Series, Int32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 2, 3, 4, 5]}), * b: Series.new([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) * }); * * const selection = Series.new({type: new Int32, data: [2,4,5]}); * * df.gather(selection); // {a: [2, 4, 5], b: [2.0, 4.0, 5.0]} * ``` */ gather(selection: Series, nullify_out_of_bounds?: boolean, memoryResource?: MemoryResource): DataFrame; /** * Returns the first n rows as a new DataFrame. * * @param n The number of rows to return. * * @example * ```typescript * import {DataFrame, Series, Int32} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 2, 3, 4, 5, 6]}), * b: Series.new([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) * }); * * a.head(); * // {a: [0, 1, 2, 3, 4], b: [0.0, 1.0, 2.0, 3.0, 4.0]} * * b.head(1); * // {a: [0], b: [0.0]} * * a.head(-1); * // throws index out of bounds error * ``` */ head(n?: number): DataFrame; /** * Returns the last n rows as a new DataFrame. * * @param n The number of rows to return. * * @example * ```typescript * import {DataFrame, Series, Int32} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 2, 3, 4, 5, 6]}), * b: Series.new([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) * }); * * a.tail(); * // {a: [2, 3, 4, 5, 6], b: [2.0, 3.0, 4.0, 5.0, 6.0]} * * b.tail(1); * // {a: [6], b: [6.0]} * * a.tail(-1); * // throws index out of bounds error * ``` */ tail(n?: number): DataFrame; /** * Return a group-by on a single column. * * @param props configuration for the groupby * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([0, 1, 1, 2, 2, 2]), * b: Series.new([0, 1, 2, 3, 4, 4]), * c: Series.new([1, 2, 3, 4, 5, 6]) * }) * * df.groupby({by: 'a'}).max() // { a: [2, 1, 0], b: [4, 2, 0], c: [6, 3, 1] } * * ``` */ groupBy(props: GroupBySingleProps): GroupBySingle; /** * Return a group-by on a multiple columns. * * @param props configuration for the groupby * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([0, 1, 1, 2, 2, 2]), * b: Series.new([0, 1, 2, 3, 4, 4]), * c: Series.new([1, 2, 3, 4, 5, 6]) * }) * * df.groupby({by: ['a', 'b']}).max() * // { * // "a_b": [{"a": [2, 1, 1, 2, 0], "b": [4, 2, 1, 3, 0]}], * // "c": [6, 3, 2, 4, 1] * // } * * ``` */ groupBy(props: GroupByMultipleProps): GroupByMultiple; /** * Return sub-selection from a DataFrame from the specified boolean mask. * * @param mask * * @example * ```typescript * import {DataFrame, Series, Bool8} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([0, 1, 2, 3, 4, 4]), * b: Series.new([0, NaN, 2, 3, 4, 4]) * }) * const mask = Series.new({type: new Bool8, data: [0, 0, 1, 0, 1, 1]}) * * df.filter(mask); // {a: [2, 4, 4], b: [2, 4, 4]} * * ``` */ filter(mask: Series, memoryResource?: MemoryResource): DataFrame; /** * Join columns with other DataFrame. * * @param props the configuration for the join * @returns the joined DataFrame */ join(props: JoinProps): DataFrame<{ [P in keyof JoinResult]: P extends TOn ? CommonType : JoinResult[P]; }>; /** * Join columns with other DataFrame. * * @param props the configuration for the join * @returns the joined DataFrame */ join(props: JoinProps): DataFrame; /** * Serialize this DataFrame to CSV format. * * @param options Options controlling CSV writing behavior. * * @returns A node ReadableStream of the CSV data. */ toCSV(options?: WriteCSVOptions): AsyncIterable; /** * Write a DataFrame to ORC format. * * @param filePath File path or root directory path. * @param options Options controlling ORC writing behavior. * */ toORC(filePath: string, options?: WriteORCOptions): void; /** * Write a DataFrame to Parquet format. * * @param filePath File path or root directory path. * @param options Options controlling Parquet writing behavior. * */ toParquet(filePath: string, options?: WriteParquetOptions): void; /** * Copy a Series to an Arrow vector in host memory * * @example * ```typescript * import {DataFrame, Series} from "@rapidsai/cudf"; * * const df = new DataFrame({a: Series.new([0,1,2]), b: Series.new(["one", "two", "three"])}); * * const arrow_df = df.toArrow(); // Arrow table * * arrow_df.toArray(); * // [ * // { "a": 0, "b": "one" }, * // { "a": 1, "b": "two" }, * // { "a": 2, "b": "three" } * // ] * ``` */ toArrow(): arrow.Table; protected __constructChild

(name: P, col: Column): Series; /** * drop null rows * @ignore */ protected _dropNullsRows(thresh?: number, subset?: readonly (string & keyof T)[]): DataFrame; /** * drop rows with NaN values (float type only) * @ignore */ protected _dropNaNsRows(thresh?: number, subset?: readonly (string & keyof T)[]): DataFrame; /** * drop columns with nulls * @ignore */ protected _dropNullsColumns(thresh?: number, subset?: Series): DataFrame; /** * drop columns with NaN values(float type only) * @ignore */ protected _dropNaNsColumns(thresh?: number, subset?: Series, memoryResource?: MemoryResource): DataFrame; /** * Drops rows (or columns) containing nulls (*Note: only null values are dropped and not NaNs) * * @param axis Whether to drop rows (axis=0, default) or columns (axis=1) containing nulls * @param thresh drops every row (or column) containing less than thresh non-null values. * * thresh=1 (default) drops rows (or columns) containing all null values (non-null < thresh(1)). * * if axis = 0, thresh=df.numColumns: drops only rows containing at-least one null value * (non-null values in a row < thresh(df.numColumns)). * * if axis = 1, thresh=df.numRows: drops only columns containing at-least one null values * (non-null values in a column < thresh(df.numRows)). * * @param subset List of columns to consider when dropping rows (all columns are considered by * default). * Alternatively, when dropping columns, subset is a Series with indices to select rows * (all rows are considered by default). * @returns DataFrame with dropped rows (or columns) containing nulls * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([0, null, 2, null, 4, 4]), * b: Series.new([0, null, 2, 3, null, 4]), * c: Series.new([null, null, null, null, null, null]) * }); * * // delete rows with all nulls (default thresh=1) * df.dropNulls(0); * // return { * // a: [0, 2, null, 4, 4], b: [0, 2, 3, null, 4], * // c: [null, null, null, null, null] * // } * * // delete rows with atleast one null * df.dropNulls(0, df.numColumns); * // returns empty df, since each row contains atleast one null * * // delete columns with all nulls (default thresh=1) * df.dropNulls(1); * // returns {a: [0, null, 2, null, 4, 4], b: [0, null, 2, 3, null, 4]} * * // delete columns with atleast one null * df.dropNulls(1, df.numRows); * // returns empty df, since each column contains atleast one null * * ``` */ dropNulls(axis?: number, thresh?: number, subset?: (string & keyof T)[] | Series): DataFrame; /** * Drops rows (or columns) containing NaN, provided the columns are of type float * * @param axis Whether to drop rows (axis=0, default) or columns (axis=1) containing NaN * @param thresh drops every row (or column) containing less than thresh non-NaN values. * * thresh=1 (default) drops rows (or columns) containing all NaN values (non-NaN < thresh(1)). * * if axis = 0, thresh=df.numColumns: drops only rows containing at-least one NaN value (non-NaN * values in a row < thresh(df.numColumns)). * * if axis = 1, thresh=df.numRows: drops only columns containing at-least one NaN values * (non-NaN values in a column < thresh(df.numRows)). * @param subset List of float columns to consider when dropping rows (all float columns are * considered by default). * Alternatively, when dropping columns, subset is a Series with indices to select rows * (all rows are considered by default). * * @returns DataFrame with dropped rows (or columns) containing NaN * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([0, NaN, 2, NaN, 4, 4]), * b: Series.new([0, NaN, 2, 3, NaN, 4]), * c: Series.new([NaN, NaN, NaN, NaN, NaN, NaN]) * }); * * // delete rows with all NaNs (default thresh=1) * df.dropNaNs(0); * // return { * // a: [0, 2, NaN, 4, 4], b: [0, 2, 3, NaN, 4], * // c: [NaN, NaN, NaN, NaN,NaN] * // } * * // delete rows with atleast one NaN * df.dropNaNs(0, df.numColumns); * // returns empty df, since each row contains atleast one NaN * * // delete columns with all NaNs (default thresh=1) * df.dropNaNs(1); * // returns {a: [0, NaN, 2, NaN, 4, 4], b: [0, NaN, 2, 3, NaN, 4]} * * // delete columns with atleast one NaN * df.dropNaNs(1, df.numRows); * // returns empty df, since each column contains atleast one NaN * * ``` */ dropNaNs(axis?: number, thresh?: number, subset?: (string & keyof T)[] | Series): DataFrame; /** * Compute the trigonometric sine for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.sin(); * // return { * // a: [0, 0, 0], * // } * ``` */ sin

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the trigonometric cosine for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.cos(); * // return { * // a: [0, 1, 0], * // } * ``` */ cos

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the trigonometric tangent for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.tan(); * // return { * // a: [0, 0, 0], * // } * ``` */ tan

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the trigonometric sine inverse for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.asin(); * // return { * // a: [0, 0, 0], * // } * ``` */ asin

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the trigonometric cosine inverse for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.acos(); * // return { * // a: [0, 1, 0], * // } * ``` */ acos

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the trigonometric tangent inverse for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.atan(); * // return { * // a: [-1, 0, 1], * // } * ``` */ atan

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the hyperbolic sine for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.sinh(); * // return { * // a: [-10, 0, 10], * // } * ``` */ sinh

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the hyperbolic cosine for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.cosh(); * // return { * // a: [10, 1, 10], * // } * ``` */ cosh

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the hyperbolic tangent for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.tanh(); * // return { * // a: [0, 0, 0], * // } * ``` */ tanh

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the hyperbolic sine inverse for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.asinh(); * // return { * // a: [-1, 0, 1], * // } * ``` */ asinh

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the hyperbolic cosine inverse for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.acosh(); * // return { * // a: [0, 0, 1], * // } * ``` */ acosh

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the hyperbolic tangent inverse for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series, Int8} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new({type: new Int8, data: [-3, 0, 3]}) * }); * df.atanh(); * // return { * // a: [0, 0, 0], * // } * ``` */ atanh

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the exponential (base e, euler number) for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([-1.2, 2.5]) * }); * df.exp(); * // return { * // a: [0.30119421191220214, 12.182493960703473], * // } * ``` */ exp

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the natural logarithm (base e) for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([-1.2, 2.5, 4]) * }); * df.log(); * // return { * // a: [NaN, 0.9162907318741551, 1.3862943611198906], * // } * ``` */ log

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the square-root (x^0.5) for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([-1.2, 2.5, 4]) * }); * df.sqrt(); * // return { * // a: [NaN, 1.5811388300841898, 2], * // } * ``` */ sqrt

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the cube-root (x^(1.0/3)) for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([-1.2, 2.5]) * }); * df.cbrt(); * // return { * // a: [-1.0626585691826111, 1.3572088082974534], * // } * ``` */ cbrt

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the smallest integer value not less than arg for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([-1.2, 2.5, -3, 4.6, 5]) * }); * df.ceil(); * // return { * // a: [-1, 3, -3, 5, 5], * // } * ``` */ ceil

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the largest integer value not greater than arg for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([-1.2, 2.5, -3, 4.6, 5]) * }); * df.floor(); * // return { * // a: [-2, 2, -3, 4, 5], * // } * ``` */ floor

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the absolute value for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([-1, 2, -3, 4, 5]) * }); * df.abs(); * // return { * // a: [1, 2, 3, 4, 5], * // } * ``` */ abs

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Compute the logical not (!) for all NumericSeries in the DataFrame * * @returns A DataFrame with the operation performed on all NumericSeries * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([0, 1, 2, 3, 4]) * }); * df.not(); * // return { * // a: [true, false, false, false, false], * // } * ``` */ not

(memoryResource?: MemoryResource): T[P] extends Numeric ? DataFrame : never; /** * Return a Series containing the unbiased kurtosis result for each Series in the * DataFrame. * * @param skipNulls Exclude NA/null values. If an entire row/column is NA, the result will be NA. * @returns A Series containing the unbiased kurtosis result for all Series in the DataFrame * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([1, 2, 3, 4]), * b: Series.new([7, 8, 9, 10]) * }); * df.kurtosis(); // {-1.1999999999999904, -1.2000000000000686} * ``` */ kurtosis

(skipNulls?: boolean): Series; /** * Return a Series containing the unbiased skew result for each Series in the * DataFrame. * * @param skipNulls Exclude NA/null values. If an entire row/column is NA, the result will be NA. * @returns A Series containing the unbiased skew result for all Series in the DataFrame * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([1, 2, 3, 4, 5, 6, 6]), * b: Series.new([7, 8, 9, 10, 11, 12, 12]) * }); * df.skew(); // {-0.288195490292614, -0.2881954902926153} * ``` */ skew

(skipNulls?: boolean): Series; /** * Compute the sum for all Series in the DataFrame. * * @param subset List of columns to select (all columns are considered by * default). * @param skipNulls The optional skipNulls if true drops NA and null values before computing * reduction, * else if skipNulls is false, reduction is computed directly. * @param memoryResource Memory resource used to allocate the result Column's device memory. * * @returns A Series containing the sum of all values for each Series * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([1, 2]), * b: Series.new([3.5, 4]) * }); * df.sum(); // [3, 7.5] * * const df2 = new DataFrame({ * a: Series.new(['foo', 'bar']), * b: Series.new([3, 4]) * }); * * df2.sum(); // returns `never` * ``` */ sum

(subset?: readonly P[], skipNulls?: boolean, memoryResource?: MemoryResource): Series; /** * Convert NaNs (if any) to nulls. * * @param subset List of float columns to consider to replace NaNs with nulls. * * @returns DataFrame with NaNs(if any) converted to nulls * * @example * ```typescript * import {DataFrame, Series, Int32, Float32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, 2, 3, 4, 4]}), * b: Series.new({type: new Float32, data: [0, NaN, 2, 3, 4, 4]}) * }); * df.get("b").nullCount; // 0 * const df1 = df.nansToNulls(); * df1.get("b").nullCount; // 1 * * ``` */ nansToNulls(subset?: (keyof T)[], memoryResource?: MemoryResource): DataFrame; /** * Creates a DataFrame replacing any FloatSeries with a Bool8Series where `true` indicates the * value is `NaN` and `false` indicates the value is valid. * * @returns a DataFrame replacing instances of FloatSeries with a Bool8Series where `true` * indicates the value is `NaN` * * @example * ```typescript * import {DataFrame, Series, Int32, Float32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, null]}), * b: Series.new({type: new Float32, data: [0, NaN, 2]}) * }); * * df.isNaN() * // return { * // a: [0, 1, null], * // b: [false, true, false], * // } * ``` */ isNaN(memoryResource?: MemoryResource): DataFrame; /** * Creates a DataFrame of `BOOL8` Series where `true` indicates the value is null and * `false` indicates the value is valid. * * @returns a DataFrame containing Series of 'BOOL8' where 'true' indicates the value is null * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([0, null, 2]), * b: Series.new(['foo', 'bar', null]) * }); * * df.isNull() * // return { * // a: [false, true, false], * // b: [false, false, true], * // } * ``` */ isNull(memoryResource?: MemoryResource): DataFrame<{ [P in keyof T]: Bool8; }>; /** * Creates a DataFrame replacing any FloatSeries with a Bool8Series where `false` indicates the * value is `NaN` and `true` indicates the value is valid. * * @returns a DataFrame replacing instances of FloatSeries with a Bool8Series where `false` * indicates the value is `NaN` * * @example * ```typescript * import {DataFrame, Series, Int32, Float32} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new({type: new Int32, data: [0, 1, null]}), * b: Series.new({type: new Float32, data: [0, NaN, 2]}) * }); * * df.isNotNaN() * // return { * // a: [0, 1, null], * // b: [true, false, true], * // } * ``` */ isNotNaN(): DataFrame; /** * Creates a DataFrame of `BOOL8` Series where `false` indicates the value is null and * `true` indicates the value is valid. * * @returns a DataFrame containing Series of 'BOOL8' where 'false' indicates the value is null * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * const df = new DataFrame({ * a: Series.new([0, null, 2]), * b: Series.new(['foo', 'bar', null]) * }); * * df.isNotNull() * // return { * // a: [true, false, true], * // b: [true, true, false], * // } * ``` */ isNotNull(): DataFrame<{ [P in keyof T]: Bool8; }>; /** * Replace null values with a value. * * @param value The scalar value to use in place of nulls. * @param memoryResource The optional MemoryResource used to allocate the result Column's device * memory. * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([0, null, 2]); * b: Series.new([null, null, null]); * }); * * df.replaceNulls(1); * // return { * // a: [0, 1, 2], * // b: [1, 1, 1], * // } * ``` */ replaceNulls(value: R['scalarType'], memoryResource?: MemoryResource): DataFrame; /** * Replace null values with the corresponding elements from another Map of Series. * * @param value The map of Series to use in place of nulls. * @param memoryResource The optional MemoryResource used to allocate the result Column's device * memory. * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const df = new DataFrame({ * a: Series.new([0, null, 2]); * b: Series.new([null, null, null]); * }); * * df.replaceNulls({'a': Series.new([0, 1, 2]), 'b': Series.new([1, 1, 1])}); * // return { * // a: [0, 1, 2], * // b: [1, 1, 1], * // } * ``` */ replaceNulls(value: SeriesMap, memoryResource?: MemoryResource): DataFrame; /** * Drops duplicate rows from a DataFrame * * @param keep Determines whether to keep the first, last, or none of the duplicate items. * @param nullsEqual Determines whether nulls are handled as equal values. * @param nullsFirst Determines whether null values are inserted before or after non-null * values. * @param subset List of columns to consider when dropping rows (all columns are considered by * default). * @param memoryResource Memory resource used to allocate the result Column's device memory. * * @returns a DataFrame without duplicate rows * ``` */ dropDuplicates(keep?: keyof typeof DuplicateKeepOption, nullsEqual?: boolean, nullsFirst?: boolean, subset?: readonly (string & keyof T)[], memoryResource?: MemoryResource): DataFrame; } export {}; //# sourceMappingURL=data_frame.d.ts.map