import { Writable } from "node:stream"; import { DataType, type DTypeToJs, type JsToDtype } from "./datatypes"; import { type GroupBy, RollingGroupBy } from "./groupby"; import { type LazyDataFrame } from "./lazy/dataframe"; import { Expr } from "./lazy/expr"; import { Series } from "./series"; import type { Arithmetic, Deserialize, GroupByOps, Sample, Serialize } from "./shared_traits"; import type { CrossJoinOptions, CsvWriterOptions, DifferentNameColumnJoinOptions, FillNullStrategy, JoinOptions, SameNameColumnJoinOptions, WriteAvroOptions, WriteIPCOptions, WriteParquetOptions } from "./types"; import { type ColumnSelection, type ColumnsOrExpr, type ExprOrString, type Simplify } from "./utils"; declare const inspect: unique symbol; export declare const writeCsvDefaultOptions: Partial; /** * Write methods for DataFrame */ interface WriteMethods { /** * __Write DataFrame to comma-separated values file (csv).__ * * If no options are specified, it will return a new string containing the contents * ___ * @param dest file or stream to write to * @param options.includeBom - Whether to include UTF-8 BOM in the CSV output. * @param options.lineTerminator - String used to end each row. * @param options.includeHeader - Whether or not to include header in the CSV output. * @param options.separator - Separate CSV fields with this symbol. Defaults: `,` * @param options.quoteChar - Character to use for quoting. Default: '"' * @param options.batchSize - Number of rows that will be processed per thread. * @param options.datetimeFormat - A format string, with the specifiers defined by the * `chrono `_ * Rust crate. If no format specified, the default fractional-second * precision is inferred from the maximum timeunit found in the frame's * Datetime cols (if any). * @param options.dateFormat - A format string, with the specifiers defined by the * `chrono `_ * Rust crate. * @param options.timeFormat A format string, with the specifiers defined by the * `chrono `_ * Rust crate. * @param options.floatPrecision - Number of decimal places to write, applied to both `Float32` and `Float64` datatypes. * @param options.nullValue - A string representing null values (defaulting to the empty string). * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.writeCSV(); * foo,bar,ham * 1,6,a * 2,7,b * 3,8,c * * // using a file path * > df.head(1).writeCSV("./foo.csv") * // foo.csv * foo,bar,ham * 1,6,a * * // using a write stream * > const writeStream = new Stream.Writable({ * ... write(chunk, encoding, callback) { * ... console.log("writeStream: %O', chunk.toString()); * ... callback(null); * ... } * ... }); * > df.head(1).writeCSV(writeStream, {includeHeader: false}); * writeStream: '1,6,a' * ``` * @category IO */ writeCSV(dest: string | Writable, options?: CsvWriterOptions): void; writeCSV(): Buffer; writeCSV(options: CsvWriterOptions): Buffer; /** * Write Dataframe to JSON string, file, or write stream * @param destination file or write stream * @param options.format - json | lines * @example * ``` * > const df = pl.DataFrame({ * ... foo: [1,2,3], * ... bar: ['a','b','c'] * ... }) * * > df.writeJSON({format:"json"}) * `[ {"foo":1.0,"bar":"a"}, {"foo":2.0,"bar":"b"}, {"foo":3.0,"bar":"c"}]` * * > df.writeJSON({format:"lines"}) * `{"foo":1.0,"bar":"a"} * {"foo":2.0,"bar":"b"} * {"foo":3.0,"bar":"c"}` * * // writing to a file * > df.writeJSON("/path/to/file.json", {format:'lines'}) * ``` * @category IO */ writeJSON(destination: string | Writable, options?: { format: "lines" | "json"; }): void; writeJSON(options?: { format: "lines" | "json"; }): Buffer; /** * Write to Arrow IPC feather file, either to a file path or to a write stream. * @param destination File path to which the file should be written, or writable. * @param options.compression Compression method *defaults to "uncompressed"* * @category IO */ writeIPC(destination: string | Writable, options?: WriteIPCOptions): void; writeIPC(options?: WriteIPCOptions): Buffer; /** * Write to Arrow IPC stream file, either to a file path or to a write stream. * @param destination File path to which the file should be written, or writable. * @param options.compression Compression method *defaults to "uncompressed"* * @category IO */ writeIPCStream(destination: string | Writable, options?: WriteIPCOptions): void; writeIPCStream(options?: WriteIPCOptions): Buffer; /** * Write the DataFrame disk in parquet format. * @param destination File path to which the file should be written, or writable. * @param options.compression Compression method *defaults to "uncompressed"* * @category IO */ writeParquet(destination: string | Writable, options?: WriteParquetOptions): void; writeParquet(options?: WriteParquetOptions): Buffer; /** * Write the DataFrame disk in avro format. * @param destination File path to which the file should be written, or writable. * @param options.compression Compression method *defaults to "uncompressed"* * @category IO */ writeAvro(destination: string | Writable, options?: WriteAvroOptions): void; writeAvro(options?: WriteAvroOptions): Buffer; } export type Schema = Record; type SchemaToSeriesRecord> = { [K in keyof T]: K extends string ? Series : never; }; type ArrayLikeLooseRecordToSchema>> = { [K in keyof T]: K extends string | number ? T[K] extends ArrayLike ? V extends DataType ? V : JsToDtype : never : never; }; type ExtractJoinKeys = T extends string[] ? T[number] : T; type ExtractSuffix = T extends { suffix: infer Suffix; } ? Suffix : "_right"; export type JoinSchemas = Simplify<{ [K1 in keyof S1]: S1[K1]; } & { [K2 in Exclude]: K2 extends keyof S1 ? never : S2[K2]; } & { [K_SUFFIXED in keyof S1 & Exclude : Opt extends DifferentNameColumnJoinOptions ? ExtractJoinKeys : never> as `${K_SUFFIXED extends string ? K_SUFFIXED : never}${ExtractSuffix}`]: K_SUFFIXED extends string ? S2[K_SUFFIXED] : never; }>; /** * A DataFrame is a two-dimensional data structure that represents data as a table * with rows and columns. * * @param data - Object, Array, or Series * Two-dimensional data in various forms. object must contain Arrays. * Array may contain Series or other Arrays. * @param columns - Array of str, default undefined * Column labels to use for resulting DataFrame. If specified, overrides any * labels already present in the data. Must match data dimensions. * @param orient - 'col' | 'row' default undefined * Whether to interpret two-dimensional data as columns or as rows. If None, * the orientation is inferred by matching the columns and data dimensions. If * this does not yield conclusive results, column orientation is used. * @example * Constructing a DataFrame from an object : * ``` * > const data = {'a': [1n, 2n], 'b': [3, 4]}; * > const df = pl.DataFrame(data); * > console.log(df.toString()); * shape: (2, 2) * ╭─────┬─────╮ * │ a ┆ b │ * │ --- ┆ --- │ * │ u64 ┆ i64 │ * ╞═════╪═════╡ * │ 1 ┆ 3 │ * ├╌╌╌╌╌┼╌╌╌╌╌┤ * │ 2 ┆ 4 │ * ╰─────┴─────╯ * ``` * Notice that the dtype is automatically inferred as a polars Int64: * ``` * > df.dtypes * ['UInt64', `Int64'] * ``` * In order to specify dtypes for your columns, initialize the DataFrame with a list * of Series instead: * ``` * > const data = [pl.Series('col1', [1, 2], pl.Float32), pl.Series('col2', [3, 4], pl.Int64)]; * > const df2 = pl.DataFrame(series); * > console.log(df2.toString()); * shape: (2, 2) * ╭──────┬──────╮ * │ col1 ┆ col2 │ * │ --- ┆ --- │ * │ f32 ┆ i64 │ * ╞══════╪══════╡ * │ 1 ┆ 3 │ * ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ 2 ┆ 4 │ * ╰──────┴──────╯ * ``` * * Constructing a DataFrame from a list of lists, row orientation inferred: * ``` * > const data = [[1, 2, 3], [4, 5, 6]]; * > const df4 = pl.DataFrame(data, ['a', 'b', 'c']); * > console.log(df4.toString()); * shape: (2, 3) * ╭─────┬─────┬─────╮ * │ a ┆ b ┆ c │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ i64 │ * ╞═════╪═════╪═════╡ * │ 1 ┆ 2 ┆ 3 │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 4 ┆ 5 ┆ 6 │ * ╰─────┴─────┴─────╯ * ``` */ export interface DataFrame extends Arithmetic>, Sample>, WriteMethods, Serialize, GroupByOps { /** @ignore */ _df: any; dtypes: DataType[]; height: number; shape: { height: number; width: number; }; width: number; get columns(): string[]; set columns(cols: string[]); [inspect](): string; [Symbol.iterator](): Generator; /** * Very cheap deep clone. */ clone(): DataFrame; /** * __Summary statistics for a DataFrame.__ * * Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes. * ___ * Example * ``` * > const df = pl.DataFrame({ * ... 'a': [1.0, 2.8, 3.0], * ... 'b': [4, 5, 6], * ... "c": [True, False, True] * ... }); * ... df.describe() * shape: (5, 4) * ╭──────────┬───────┬─────┬──────╮ * │ describe ┆ a ┆ b ┆ c │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ str ┆ f64 ┆ f64 ┆ f64 │ * ╞══════════╪═══════╪═════╪══════╡ * │ "mean" ┆ 2.267 ┆ 5 ┆ null │ * ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ "std" ┆ 1.102 ┆ 1 ┆ null │ * ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ "min" ┆ 1 ┆ 4 ┆ 0.0 │ * ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ "max" ┆ 3 ┆ 6 ┆ 1 │ * ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ "median" ┆ 2.8 ┆ 5 ┆ null │ * ╰──────────┴───────┴─────┴──────╯ * ``` */ describe(): DataFrame; /** * __Remove column from DataFrame and return as new.__ * ___ * @param name * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'], * ... "apple": ['a', 'b', 'c'] * ... }); * // df: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // apple: pl.Series; * // }> * > const df2 = df.drop(['ham', 'apple']); * // df2: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // }> * > console.log(df2.toString()); * shape: (3, 2) * ╭─────┬─────╮ * │ foo ┆ bar │ * │ --- ┆ --- │ * │ i64 ┆ f64 │ * ╞═════╪═════╡ * │ 1 ┆ 6 │ * ├╌╌╌╌╌┼╌╌╌╌╌┤ * │ 2 ┆ 7 │ * ├╌╌╌╌╌┼╌╌╌╌╌┤ * │ 3 ┆ 8 │ * ╰─────┴─────╯ * ``` */ drop(name: U): DataFrame>>; drop(names: U): DataFrame>>; drop(name: U, ...names: V): DataFrame>>; /** * __Return a new DataFrame where the null values are dropped.__ * * This method only drops nulls row-wise if any single value of the row is null. * ___ * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, null, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > console.log(df.dropNulls().toString()); * shape: (2, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪═════╡ * │ 1 ┆ 6 ┆ "a" │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 3 ┆ 8 ┆ "c" │ * └─────┴─────┴─────┘ * ``` */ dropNulls(column: keyof S): DataFrame; dropNulls(columns: (keyof S)[]): DataFrame; dropNulls(...columns: (keyof S)[]): DataFrame; /** * __Explode `DataFrame` to long format by exploding a column with Lists.__ * ___ * @param columns - column or columns to explode * @example * ``` * > const df = pl.DataFrame({ * ... "letters": ["c", "c", "a", "c", "a", "b"], * ... "nrs": [[1, 2], [1, 3], [4, 3], [5, 5, 5], [6], [2, 1, 2]] * ... }); * > console.log(df.toString()); * shape: (6, 2) * ╭─────────┬────────────╮ * │ letters ┆ nrs │ * │ --- ┆ --- │ * │ str ┆ list [i64] │ * ╞═════════╪════════════╡ * │ "c" ┆ [1, 2] │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ "c" ┆ [1, 3] │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ "a" ┆ [4, 3] │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ "c" ┆ [5, 5, 5] │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ "a" ┆ [6] │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ "b" ┆ [2, 1, 2] │ * ╰─────────┴────────────╯ * > df.explode("nrs") * shape: (13, 2) * ╭─────────┬─────╮ * │ letters ┆ nrs │ * │ --- ┆ --- │ * │ str ┆ i64 │ * ╞═════════╪═════╡ * │ "c" ┆ 1 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 2 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 1 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 3 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ ... ┆ ... │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 5 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "a" ┆ 6 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "b" ┆ 2 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "b" ┆ 1 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "b" ┆ 2 │ * ╰─────────┴─────╯ * ``` */ explode(columns: ExprOrString[]): DataFrame; explode(column: ExprOrString): DataFrame; explode(column: ExprOrString, ...columns: ExprOrString[]): DataFrame; /** Extend the memory backed by this `DataFrame` with the values from `other`. Different from `vstack` which adds the chunks from `other` to the chunks of this `DataFrame` `extent` appends the data from `other` to the underlying memory locations and thus may cause a reallocation. If this does not cause a reallocation, the resulting data structure will not have any extra chunks and thus will yield faster queries. Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during online operations where you add `n` rows and rerun a query. Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance when you read in multiple files and when to store them in a single `DataFrame`. In the latter case, finish the sequence of `vstack` operations with a `rechunk`. * @param other DataFrame to vertically add. * @returns This method modifies the dataframe in-place. The dataframe is returned for convenience only. * @see {@link vstack} * @example * ``` const df1 = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}); const df2 = pl.DataFrame({"foo": [10, 20, 30], "bar": [40, 50, 60]}); >>> df1.extend(df2) shape: (6, 2) ┌─────┬─────┐ │ foo ┆ bar │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═════╪═════╡ │ 1 ┆ 4 │ │ 2 ┆ 5 │ │ 3 ┆ 6 │ │ 10 ┆ 40 │ │ 20 ┆ 50 │ │ 30 ┆ 60 │ └─────┴─────┘ * ``` */ extend(other: DataFrame): DataFrame; /** * Fill null/missing values by a filling strategy * * @param strategy - One of: * - "backward" * - "forward" * - "mean" * - "min' * - "max" * - "zero" * - "one" * @returns DataFrame with None replaced with the filling strategy. */ fillNull(strategy: FillNullStrategy): DataFrame; /** * Filter the rows in the DataFrame based on a predicate expression. * ___ * @param predicate - Expression that evaluates to a boolean Series. * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * // Filter on one condition * > df.filter(pl.col("foo").lt(3)) * shape: (2, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪═════╡ * │ 1 ┆ 6 ┆ a │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 2 ┆ 7 ┆ b │ * └─────┴─────┴─────┘ * // Filter on multiple conditions * > df.filter( * ... pl.col("foo").lt(3) * ... .and(pl.col("ham").eq(pl.lit("a"))) * ... ) * shape: (1, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪═════╡ * │ 1 ┆ 6 ┆ a │ * └─────┴─────┴─────┘ * ``` */ filter(predicate: any): DataFrame; /** * Find the index of a column by name. * ___ * @param name -Name of the column to find. * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.findIdxByName("ham")) * 2 * ``` */ findIdxByName(name: keyof S): number; /** * __Apply a horizontal reduction on a DataFrame.__ * * This can be used to effectively determine aggregations on a row level, * and can be applied to any DataType that can be supercasted (casted to a similar parent type). * * An example of the supercast rules when applying an arithmetic operation on two DataTypes are for instance: * - Int8 + Utf8 = Utf8 * - Float32 + Int64 = Float32 * - Float32 + Float64 = Float64 * ___ * @param operation - function that takes two `Series` and returns a `Series`. * @returns Series * @example * ``` * > // A horizontal sum operation * > let df = pl.DataFrame({ * ... "a": [2, 1, 3], * ... "b": [1, 2, 3], * ... "c": [1.0, 2.0, 3.0] * ... }); * > df.fold((s1, s2) => s1.plus(s2)) * Series: 'a' [f64] * [ * 4 * 5 * 9 * ] * > // A horizontal minimum operation * > df = pl.DataFrame({ * ... "a": [2, 1, 3], * ... "b": [1, 2, 3], * ... "c": [1.0, 2.0, 3.0] * ... }); * > df.fold((s1, s2) => s1.zipWith(s1.lt(s2), s2)) * Series: 'a' [f64] * [ * 1 * 1 * 3 * ] * > // A horizontal string concatenation * > df = pl.DataFrame({ * ... "a": ["foo", "bar", 2], * ... "b": [1, 2, 3], * ... "c": [1.0, 2.0, 3.0] * ... }) * > df.fold((s1, s2) => s.plus(s2)) * Series: '' [f64] * [ * "foo11" * "bar22 * "233" * ] * ``` */ fold[keyof S] | Series, s2: SchemaToSeriesRecord[keyof S]) => Series>(operation: F): Series; /** * Check if DataFrame is equal to other. * ___ * @param other DataFrame to compare. * @param nullEqual Consider null values as equal. * @example * ``` * > const df1 = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'] * ... }) * > const df2 = pl.DataFrame({ * ... "foo": [3, 2, 1], * ... "bar": [8.0, 7.0, 6.0], * ... "ham": ['c', 'b', 'a'] * ... }) * > df1.frameEqual(df1) * true * > df1.frameEqual(df2) * false * ``` */ frameEqual(other: DataFrame, nullEqual: boolean): boolean; frameEqual(other: DataFrame): boolean; /** * Get a single column as Series by name. * * --- * @example * ``` * > const df = pl.DataFrame({ * ... foo: [1, 2, 3], * ... bar: [6, null, 8], * ... ham: ["a", "b", "c"], * ... }); * // df: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // }> * > const column = df.getColumn("foo"); * // column: pl.Series * ``` */ getColumn(name: U): SchemaToSeriesRecord[U]; getColumn(name: string): Series; /** * Get the DataFrame as an Array of Series. * --- * @example * ``` * > const df = pl.DataFrame({ * ... foo: [1, 2, 3], * ... bar: [6, null, 8], * ... ham: ["a", "b", "c"], * ... }); * // df: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // }> * > const columns = df.getColumns(); * // columns: (pl.Series | pl.Series | pl.Series)[] * ``` */ getColumns(): SchemaToSeriesRecord[keyof S][]; /** * Start a groupby operation. * ___ * @param by - Column(s) to group by. */ groupBy(...by: ColumnSelection[]): GroupBy; /** * Hash and combine the rows in this DataFrame. _(Hash value is UInt64)_ * @param k0 - seed parameter * @param k1 - seed parameter * @param k2 - seed parameter * @param k3 - seed parameter */ hashRows(k0?: number, k1?: number, k2?: number, k3?: number): Series; hashRows(options: { k0?: number; k1?: number; k2?: number; k3?: number; }): Series; /** * Get first N rows as DataFrame. * ___ * @param length - Length of the head. * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3, 4, 5], * ... "bar": [6, 7, 8, 9, 10], * ... "ham": ['a', 'b', 'c', 'd','e'] * ... }); * > df.head(3) * shape: (3, 3) * ╭─────┬─────┬─────╮ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪═════╡ * │ 1 ┆ 6 ┆ "a" │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 2 ┆ 7 ┆ "b" │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 3 ┆ 8 ┆ "c" │ * ╰─────┴─────┴─────╯ * ``` */ head(length?: number): DataFrame; /** * Return a new DataFrame grown horizontally by stacking multiple Series to it. * @param columns - array of Series or DataFrame to stack * @param inPlace - Modify in place * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * // df: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // }> * > const x = pl.Series("apple", [10, 20, 30]) * // x: pl.Series * > df.hstack([x]) * // pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // apple: pl.Series; * // }> * shape: (3, 4) * ╭─────┬─────┬─────┬───────╮ * │ foo ┆ bar ┆ ham ┆ apple │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str ┆ i64 │ * ╞═════╪═════╪═════╪═══════╡ * │ 1 ┆ 6 ┆ "a" ┆ 10 │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤ * │ 2 ┆ 7 ┆ "b" ┆ 20 │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤ * │ 3 ┆ 8 ┆ "c" ┆ 30 │ * ╰─────┴─────┴─────┴───────╯ * ``` */ hstack(columns: Array | DataFrame, inPlace?: boolean): void; hstack(columns: DataFrame): DataFrame>; hstack(columns: U): DataFrame>; hstack(columns: Array | DataFrame, inPlace?: boolean): void; /** * Insert a Series at a certain column index. This operation is in place. * @param index - Column position to insert the new `Series` column. * @param series - `Series` to insert */ insertAtIdx(index: number, series: Series): void; /** * Interpolate intermediate values. The interpolation method is linear. */ interpolate(): DataFrame; /** * Get a mask of all duplicated rows in this DataFrame. */ isDuplicated(): Series; /** * Check if the dataframe is empty */ isEmpty(): boolean; /** * Get a mask of all unique rows in this DataFrame. */ isUnique(): Series; /** * __SQL like joins.__ * @param other - DataFrame to join with. * @param options * @param options.on - Name(s) of the join columns in both DataFrames. * @param options.how - Join strategy {'inner', 'left', 'right', 'full', 'semi', 'anti', 'cross'} * @param options.suffix - Suffix to append to columns with a duplicate name. * @param options.coalesce - Coalescing behavior (merging of join columns). default: undefined * - **undefined** - *(Default)* Coalesce unless `how='full'` is specified. * - **true** - Always coalesce join columns. * - **false** - Never coalesce join columns. * @param options.validate - Checks if join is of specified type. default: m:m valid options: {'m:m', 'm:1', '1:m', '1:1'} * - **m:m** - *(Default)* Many-to-many (default). Does not result in checks. * - **1:1** - One-to-one. Checks if join keys are unique in both left and right datasets. * - **1:m** - One-to-many. Checks if join keys are unique in left dataset. * - **m:1** - Many-to-one. Check if join keys are unique in right dataset. * @see {@link SameNameColumnJoinOptions} * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'] * ... }); * > const otherDF = pl.DataFrame({ * ... "apple": ['x', 'y', 'z'], * ... "ham": ['a', 'b', 'd'] * ... }); * > df.join(otherDF, {on: 'ham'}) * shape: (2, 4) * ╭─────┬─────┬─────┬───────╮ * │ foo ┆ bar ┆ ham ┆ apple │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ i64 ┆ f64 ┆ str ┆ str │ * ╞═════╪═════╪═════╪═══════╡ * │ 1 ┆ 6 ┆ "a" ┆ "x" │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤ * │ 2 ┆ 7 ┆ "b" ┆ "y" │ * ╰─────┴─────┴─────┴───────╯ * ``` */ join, Extract>>(other: DataFrame, options: Opts & SameNameColumnJoinOptions): DataFrame>; /** * __SQL like joins with different names for left and right dataframes.__ * @param other - DataFrame to join with. * @param options * @param options.leftOn - Name(s) of the left join column(s). * @param options.rightOn - Name(s) of the right join column(s). * @param options.how - Join strategy * @param options.suffix - Suffix to append to columns with a duplicate name. * @param options.coalesce - Coalescing behavior (merging of join columns). * @see {@link DifferentNameColumnJoinOptions} * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'] * ... }); * > const otherDF = pl.DataFrame({ * ... "apple": ['x', 'y', 'z'], * ... "ham": ['a', 'b', 'd'] * ... }); * > df.join(otherDF, {leftOn: 'ham', rightOn: 'ham'}) * shape: (2, 4) * ╭─────┬─────┬─────┬───────╮ * │ foo ┆ bar ┆ ham ┆ apple │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ i64 ┆ f64 ┆ str ┆ str │ * ╞═════╪═════╪═════╪═══════╡ * │ 1 ┆ 6 ┆ "a" ┆ "x" │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤ * │ 2 ┆ 7 ┆ "b" ┆ "y" │ * ╰─────┴─────┴─────┴───────╯ * ``` */ join, Extract>>(other: DataFrame, options: Opts & DifferentNameColumnJoinOptions): DataFrame>; /** * __SQL like cross joins.__ * @param other - DataFrame to join with. * @param options * @param options.how - Join strategy * @param options.suffix - Suffix to append to columns with a duplicate name. * @param options.coalesce - Coalescing behavior (merging of join columns). * @see {@link CrossJoinOptions} * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2], * ... "bar": [6.0, 7.0], * ... "ham": ['a', 'b'] * ... }); * > const otherDF = pl.DataFrame({ * ... "apple": ['x', 'y'], * ... "ham": ['a', 'b'] * ... }); * > df.join(otherDF, {how: 'cross'}) * shape: (4, 5) * ╭─────┬─────┬─────┬───────┬───────────╮ * │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │ * │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ * │ f64 ┆ f64 ┆ str ┆ str ┆ str │ * ╞═════╪═════╪═════╪═══════╪═══════════╡ * │ 1.0 ┆ 6.0 ┆ a ┆ x ┆ a │ * │ 1.0 ┆ 6.0 ┆ a ┆ y ┆ b │ * │ 2.0 ┆ 7.0 ┆ b ┆ x ┆ a │ * │ 2.0 ┆ 7.0 ┆ b ┆ y ┆ b │ * ╰─────┴─────┴─────┴───────┴───────────╯ * ``` */ join(other: DataFrame, options: Opts & CrossJoinOptions): DataFrame>; /** * Perform an asof join. This is similar to a left-join except that we * match on nearest key rather than equal keys. * * Both DataFrames must be sorted by the asofJoin key. * * For each row in the left DataFrame: * - A "backward" search selects the last row in the right DataFrame whose * 'on' key is less than or equal to the left's key. * * - A "forward" search selects the first row in the right DataFrame whose * 'on' key is greater than or equal to the left's key. * * - A "nearest" search selects the last row in the right DataFrame whose value * is nearest to the left's key. String keys are not currently supported for a * nearest search. * * The default is "backward". * * @param other DataFrame to join with. * @param options.leftOn Join column of the left DataFrame. * @param options.rightOn Join column of the right DataFrame. * @param options.on Join column of both DataFrames. If set, `leftOn` and `rightOn` should be undefined. * @param options.byLeft join on these columns before doing asof join * @param options.byRight join on these columns before doing asof join * @param options.strategy One of 'forward', 'backward', 'nearest' * @param options.suffix Suffix to append to columns with a duplicate name. * @param options.tolerance * Numeric tolerance. By setting this the join will only be done if the near keys are within this distance. * If an asof join is done on columns of dtype "Date", "Datetime" you * use the following string language: * * - 1ns *(1 nanosecond)* * - 1us *(1 microsecond)* * - 1ms *(1 millisecond)* * - 1s *(1 second)* * - 1m *(1 minute)* * - 1h *(1 hour)* * - 1d *(1 day)* * - 1w *(1 week)* * - 1mo *(1 calendar month)* * - 1y *(1 calendar year)* * - 1i *(1 index count)* * * Or combine them: * - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds * @param options.allowParallel Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel. * @param options.forceParallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel. * @param options.checkSortedness * Check the sortedness of the asof keys. If the keys are not sorted Polars * will error, or in case of 'by' argument raise a warning. This might become * a hard error in the future. * * @example * ``` * > const gdp = pl.DataFrame({ * ... date: [ * ... new Date('2016-01-01'), * ... new Date('2017-01-01'), * ... new Date('2018-01-01'), * ... new Date('2019-01-01'), * ... ], // note record date: Jan 1st (sorted!) * ... gdp: [4164, 4411, 4566, 4696], * ... }) * > const population = pl.DataFrame({ * ... date: [ * ... new Date('2016-05-12'), * ... new Date('2017-05-12'), * ... new Date('2018-05-12'), * ... new Date('2019-05-12'), * ... ], // note record date: May 12th (sorted!) * ... "population": [82.19, 82.66, 83.12, 83.52], * ... }) * > population.joinAsof( * ... gdp, * ... {leftOn:"date", rightOn:"date", strategy:"backward"} * ... ) * shape: (4, 3) * ┌─────────────────────┬────────────┬──────┐ * │ date ┆ population ┆ gdp │ * │ --- ┆ --- ┆ --- │ * │ datetime[μs] ┆ f64 ┆ i64 │ * ╞═════════════════════╪════════════╪══════╡ * │ 2016-05-12 00:00:00 ┆ 82.19 ┆ 4164 │ * ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ 2017-05-12 00:00:00 ┆ 82.66 ┆ 4411 │ * ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ 2018-05-12 00:00:00 ┆ 83.12 ┆ 4566 │ * ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ 2019-05-12 00:00:00 ┆ 83.52 ┆ 4696 │ * └─────────────────────┴────────────┴──────┘ * ``` */ joinAsof(other: DataFrame, options: { leftOn?: string; rightOn?: string; on?: string; byLeft?: string | string[]; byRight?: string | string[]; by?: string | string[]; strategy?: "backward" | "forward" | "nearest"; suffix?: string; tolerance?: number | string; allowParallel?: boolean; forceParallel?: boolean; checkSortedness?: boolean; }): DataFrame; lazy(): LazyDataFrame; /** * Get first N rows as DataFrame. * @see {@link head} */ limit(length?: number): DataFrame; map(func: (row: any[], i: number, arr: any[][]) => ReturnT): ReturnT[]; /** * Aggregate the columns of this DataFrame to their maximum value. * ___ * @param axis - either 0 or 1 * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.max() * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪══════╡ * │ 3 ┆ 8 ┆ null │ * ╰─────┴─────┴──────╯ * ``` */ max(axis: 0): DataFrame; max(axis: 1): Series; max(): DataFrame; /** * Aggregate the columns of this DataFrame to their mean value. * ___ * * @param axis - either 0 or 1 * @param nullStrategy - this argument is only used if axis == 1 */ mean(axis: 1, nullStrategy?: "ignore" | "propagate"): Series; mean(): DataFrame; mean(axis: 0): DataFrame; mean(axis: 1): Series; /** * Aggregate the columns of this DataFrame to their median value. * ___ * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.median(); * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ f64 ┆ f64 ┆ str │ * ╞═════╪═════╪══════╡ * │ 2 ┆ 7 ┆ null │ * ╰─────┴─────┴──────╯ * ``` */ median(): DataFrame; /** * Unpivot a DataFrame from wide to long format. * ___ * * @param idVars - Columns to use as identifier variables. * @param valueVars - Values to use as value variables. * @param options.variableName - Name to give to the `variable` column. Defaults to "variable" * @param options.valueName - Name to give to the `value` column. Defaults to "value" * @example * ``` * > const df1 = pl.DataFrame({ * ... 'id': [1], * ... 'asset_key_1': ['123'], * ... 'asset_key_2': ['456'], * ... 'asset_key_3': ['abc'], * ... }); * > df1.unpivot('id', ['asset_key_1', 'asset_key_2', 'asset_key_3']); * shape: (3, 3) * ┌─────┬─────────────┬───────┐ * │ id ┆ variable ┆ value │ * │ --- ┆ --- ┆ --- │ * │ f64 ┆ str ┆ str │ * ╞═════╪═════════════╪═══════╡ * │ 1 ┆ asset_key_1 ┆ 123 │ * ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤ * │ 1 ┆ asset_key_2 ┆ 456 │ * ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤ * │ 1 ┆ asset_key_3 ┆ abc │ * └─────┴─────────────┴───────┘ * ``` */ unpivot(idVars: ColumnSelection, valueVars: ColumnSelection, options?: { variableName?: string | null; valueName?: string | null; }): DataFrame; /** * Aggregate the columns of this DataFrame to their minimum value. * ___ * @param axis - either 0 or 1 * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.min(); * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪══════╡ * │ 1 ┆ 6 ┆ null │ * ╰─────┴─────┴──────╯ * ``` */ min(axis: 0): DataFrame; min(axis: 1): Series; min(): DataFrame; /** * Get number of chunks used by the ChunkedArrays of this DataFrame. */ nChunks(): number; /** * Create a new DataFrame that shows the null counts per column. * ___ * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, null, 3], * ... "bar": [6, 7, null], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.nullCount(); * shape: (1, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ u32 ┆ u32 ┆ u32 │ * ╞═════╪═════╪═════╡ * │ 1 ┆ 1 ┆ 0 │ * └─────┴─────┴─────┘ * ``` */ nullCount(): DataFrame<{ [K in keyof S]: JsToDtype; }>; partitionBy(cols: string | string[], stable?: boolean, includeKey?: boolean): DataFrame[]; partitionBy(cols: string | string[], stable: boolean, includeKey: boolean, mapFn: (df: DataFrame) => T): T[]; /** * Create a spreadsheet-style pivot table as a DataFrame. * * @param values The existing column(s) of values which will be moved under the new columns from index. If an * aggregation is specified, these are the values on which the aggregation will be computed. * If None, all remaining columns not specified on `on` and `index` will be used. * At least one of `index` and `values` must be specified. * @param options.index The column(s) that remain from the input to the output. The output DataFrame will have one row * for each unique combination of the `index`'s values. * If None, all remaining columns not specified on `on` and `values` will be used. At least one * of `index` and `values` must be specified. * @param options.on The column(s) whose values will be used as the new columns of the output DataFrame. * @param options.aggregateFunc * Any of: * - "sum" * - "max" * - "min" * - "mean" * - "median" * - "first" * - "last" * - "count" * Defaults to "first" * @param options.maintainOrder Sort the grouped keys so that the output order is predictable. * @param options.sortColumns Sort the transposed columns by name. Default is by order of discovery. * @param options.separator Used as separator/delimiter in generated column names. * @example * ``` * > const df = pl.DataFrame( * ... { * ... "foo": ["one", "one", "one", "two", "two", "two"], * ... "bar": ["A", "B", "C", "A", "B", "C"], * ... "baz": [1, 2, 3, 4, 5, 6], * ... } * ... ); * > df.pivot("baz", {index:"foo", on:"bar"}); * shape: (2, 4) * ┌─────┬─────┬─────┬─────┐ * │ foo ┆ A ┆ B ┆ C │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ str ┆ f64 ┆ f64 ┆ f64 │ * ╞═════╪═════╪═════╪═════╡ * │ one ┆ 1 ┆ 2 ┆ 3 │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ two ┆ 4 ┆ 5 ┆ 6 │ * └─────┴─────┴─────┴─────┘ * ``` */ pivot(values: string | string[], options: { index: string | string[]; on: string | string[]; aggregateFunc?: "sum" | "max" | "min" | "mean" | "median" | "first" | "last" | "count" | Expr; maintainOrder?: boolean; sortColumns?: boolean; separator?: string; }): DataFrame; pivot(options: { values: string | string[]; index: string | string[]; on: string | string[]; aggregateFunc?: "sum" | "max" | "min" | "mean" | "median" | "first" | "last" | "count" | Expr; maintainOrder?: boolean; sortColumns?: boolean; separator?: string; }): DataFrame; /** * Aggregate the columns of this DataFrame to their quantile value. * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.quantile(0.5); * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪══════╡ * │ 2 ┆ 7 ┆ null │ * ╰─────┴─────┴──────╯ * ``` */ quantile(quantile: number): DataFrame; /** * __Rechunk the data in this DataFrame to a contiguous allocation.__ * * This will make sure all subsequent operations have optimal and predictable performance. */ rechunk(): DataFrame; /** * __Rename column names.__ * ___ * * @param mapping - Key value pairs that map from old name to new name. * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * // df: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // }> * > df.rename({"foo": "apple"}); * ╭───────┬─────┬─────╮ * │ apple ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═══════╪═════╪═════╡ * │ 1 ┆ 6 ┆ "a" │ * ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 2 ┆ 7 ┆ "b" │ * ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 3 ┆ 8 ┆ "c" │ * ╰───────┴─────┴─────╯ * ``` */ rename>>(mapping: U): DataFrame<{ [K in keyof S as U[K] extends string ? U[K] : K]: S[K]; }>; rename(mapping: Record): DataFrame; /** * Replace a column at an index location. * * Warning: typescript cannot encode type mutation, * so the type of the DataFrame will be incorrect. cast the type of dataframe manually. * ___ * @param index - Column index * @param newColumn - New column to insert * @example * ``` * > const df: pl.DataFrame = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * // df: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // }> * > const x = pl.Series("apple", [10, 20, 30]); * // x: pl.Series * > df.replaceAtIdx(0, x); * // df: pl.DataFrame<{ * // foo: pl.Series; <- notice how the type is still the same! * // bar: pl.Series; * // ham: pl.Series; * // }> * shape: (3, 3) * ╭───────┬─────┬─────╮ * │ apple ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═══════╪═════╪═════╡ * │ 10 ┆ 6 ┆ "a" │ * ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 20 ┆ 7 ┆ "b" │ * ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 30 ┆ 8 ┆ "c" │ * ╰───────┴─────┴─────╯ * ``` */ replaceAtIdx(index: number, newColumn: Series): void; /** * Get a row as Array * @param index - row index * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.row(2) * [3, 8, 'c'] * ``` */ row(index: number): Array; /** * Convert columnar data to rows as arrays */ rows(): Array>; /** * @example * ``` * > const df: pl.DataFrame = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * // df: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // }> * > df.schema * // { * // foo: Float64; * // bar: Float64; * // ham: Utf8; * // } * ``` */ get schema(): S; /** * Select columns from this DataFrame. * ___ * @param columns - Column or columns to select. * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * // df: pl.DataFrame<{ * // foo: pl.Series; * // bar: pl.Series; * // ham: pl.Series; * // }> * > df.select('foo'); * // pl.DataFrame<{ * // foo: pl.Series; * // }> * shape: (3, 1) * ┌─────┐ * │ foo │ * │ --- │ * │ i64 │ * ╞═════╡ * │ 1 │ * ├╌╌╌╌╌┤ * │ 2 │ * ├╌╌╌╌╌┤ * │ 3 │ * └─────┘ * ``` */ select(...columns: U[]): DataFrame<{ [P in U]: S[P]; }>; select(...columns: ExprOrString[]): DataFrame; /** * Shift the values by a given period and fill the parts that will be empty due to this operation * with `Nones`. * ___ * @param periods - Number of places to shift (may be negative). * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.shift(1); * shape: (3, 3) * ┌──────┬──────┬──────┐ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞══════╪══════╪══════╡ * │ null ┆ null ┆ null │ * ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ 1 ┆ 6 ┆ "a" │ * ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ 2 ┆ 7 ┆ "b" │ * └──────┴──────┴──────┘ * > df.shift(-1) * shape: (3, 3) * ┌──────┬──────┬──────┐ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞══════╪══════╪══════╡ * │ 2 ┆ 7 ┆ "b" │ * ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ 3 ┆ 8 ┆ "c" │ * ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ null ┆ null ┆ null │ * └──────┴──────┴──────┘ * ``` */ shift(periods: number): DataFrame; shift({ periods }: { periods: number; }): DataFrame; /** * Shift the values by a given period and fill the parts that will be empty due to this operation * with the result of the `fill_value` expression. * ___ * @param n - Number of places to shift (may be negative). * @param fillValue - fill null values with this value. * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.shiftAndFill({n:1, fill_value:0}); * shape: (3, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪═════╡ * │ 0 ┆ 0 ┆ "0" │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 1 ┆ 6 ┆ "a" │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 2 ┆ 7 ┆ "b" │ * └─────┴─────┴─────┘ * ``` */ shiftAndFill(n: number, fillValue: number): DataFrame; shiftAndFill({ n, fillValue, }: { n: number; fillValue: number; }): DataFrame; /** * Shrink memory usage of this DataFrame to fit the exact capacity needed to hold the data. */ shrinkToFit(): DataFrame; shrinkToFit(inPlace: true): void; shrinkToFit({ inPlace }: { inPlace: true; }): void; /** * Slice this DataFrame over the rows direction. * ___ * @param opts * @param opts.offset - Offset index. * @param opts.length - Length of the slice * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.slice(1, 2); // Alternatively `df.slice({offset:1, length:2})` * shape: (2, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str │ * ╞═════╪═════╪═════╡ * │ 2 ┆ 7 ┆ "b" │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ * │ 3 ┆ 8 ┆ "c" │ * └─────┴─────┴─────┘ * ``` */ slice({ offset, length }: { offset: number; length: number; }): DataFrame; slice(offset: number, length: number): DataFrame; /** * Sort the DataFrame by column. * ___ * @param by - Column(s) to sort by. Accepts expression input, including selectors. Strings are parsed as column names. * @param descending - Sort in descending order. When sorting by multiple columns, can be specified per column by passing a sequence of booleans. * @param nullsLast - Place null values last; can specify a single boolean applying to all columns or a sequence of booleans for per-column control. * @param maintainOrder - Whether the order should be maintained if elements are equal. */ sort(by: ColumnsOrExpr, descending?: boolean, nullsLast?: boolean, maintainOrder?: boolean): DataFrame; sort({ by, descending, maintainOrder, }: { by: ColumnsOrExpr; descending?: boolean; nullsLast?: boolean; maintainOrder?: boolean; }): DataFrame; /** * Aggregate the columns of this DataFrame to their standard deviation value. * ___ * @example * ``` * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); * > df.std(); * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ f64 ┆ f64 ┆ str │ * ╞═════╪═════╪══════╡ * │ 1 ┆ 1 ┆ null │ * ╰─────┴─────┴──────╯ * ``` */ std(): DataFrame; /** * Aggregate the columns of this DataFrame to their mean value. * ___ * * @param axis - either 0 or 1 * @param nullStrategy - this argument is only used if axis == 1 */ sum(axis: 1, nullStrategy?: "ignore" | "propagate"): Series; sum(): DataFrame; sum(axis: 0): DataFrame; sum(axis: 1): Series; /** * @example * ``` * > const df = pl.DataFrame({ * ... "letters": ["c", "c", "a", "c", "a", "b"], * ... "nrs": [1, 2, 3, 4, 5, 6] * ... }); * > console.log(df.toString()); * shape: (6, 2) * ╭─────────┬─────╮ * │ letters ┆ nrs │ * │ --- ┆ --- │ * │ str ┆ i64 │ * ╞═════════╪═════╡ * │ "c" ┆ 1 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 2 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "a" ┆ 3 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 4 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "a" ┆ 5 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "b" ┆ 6 │ * ╰─────────┴─────╯ * > df.groupby("letters") * ... .tail(2) * ... .sort("letters") * shape: (5, 2) * ╭─────────┬─────╮ * │ letters ┆ nrs │ * │ --- ┆ --- │ * │ str ┆ i64 │ * ╞═════════╪═════╡ * │ "a" ┆ 3 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "a" ┆ 5 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "b" ┆ 6 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 2 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 4 │ * ╰─────────┴─────╯ * ``` */ tail(length?: number): DataFrame; /** * Converts dataframe object into row oriented javascript objects * @example * ``` * > df.toRecords() * [ * {"foo":1.0,"bar":"a"}, * {"foo":2.0,"bar":"b"}, * {"foo":3.0,"bar":"c"} * ] * ``` * @category IO */ toRecords(): { [K in keyof S]: DTypeToJs | null; }[]; /** * Converts dataframe object into a {@link TabularDataResource} */ toDataResource(): TabularDataResource; /** * Converts dataframe object into HTML */ toHTML(): string; /** * Converts dataframe object into column oriented javascript objects * @example * ``` * > df.toObject() * { * "foo": [1,2,3], * "bar": ["a", "b", "c"] * } * ``` * @category IO */ toObject(): { [K in keyof S]: DTypeToJs[]; }; toSeries(index?: number): SchemaToSeriesRecord[keyof S]; toString(): string; /** * Convert a ``DataFrame`` to a ``Series`` of type ``Struct`` * @param name Name for the struct Series * @example * ``` * > const df = pl.DataFrame({ * ... "a": [1, 2, 3, 4, 5], * ... "b": ["one", "two", "three", "four", "five"], * ... }); * > df.toStruct("nums"); * shape: (5,) * Series: 'nums' [struct[2]{'a': i64, 'b': str}] * [ * {1,"one"} * {2,"two"} * {3,"three"} * {4,"four"} * {5,"five"} * ] * ``` */ toStruct(name: string): Series; /** * Transpose a DataFrame over the diagonal. * * @remarks This is a very expensive operation. Perhaps you can do it differently. * @param options * @param options.includeHeader If set, the column names will be added as first column. * @param options.headerName If `includeHeader` is set, this determines the name of the column that will be inserted * @param options.columnNames Optional generator/iterator that yields column names. Will be used to replace the columns in the DataFrame. * * @example * > let df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}); * > df.transpose({includeHeader:true}) shape: (2, 4) ┌────────┬──────────┬──────────┬──────────┐ │ column ┆ column_0 ┆ column_1 ┆ column_2 │ │ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ f64 ┆ f64 ┆ f64 │ ╞════════╪══════════╪══════════╪══════════╡ │ a ┆ 1.0 ┆ 2.0 ┆ 3.0 │ │ b ┆ 4.0 ┆ 5.0 ┆ 6.0 │ └────────┴──────────┴──────────┴──────────┘ * // replace the auto generated column names with a list * > df.transpose({includeHeader:false, columnNames:["a", "b", "c"]}) shape: (2, 3) ┌─────┬─────┬─────┐ │ a ┆ b ┆ c │ │ --- ┆ --- ┆ --- │ │ f64 ┆ f64 ┆ f64 │ ╞═════╪═════╪═════╡ │ 1.0 ┆ 2.0 ┆ 3.0 │ │ 4.0 ┆ 5.0 ┆ 6.0 │ └─────┴─────┴─────┘ * // Include the header as a separate column * > df.transpose({ includeHeader:true, headerName:"foo", columnNames:["a", "b", "c"] }); shape: (2, 4) ┌─────┬─────┬─────┬─────┐ │ foo ┆ a ┆ b ┆ c │ │ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ f64 ┆ f64 ┆ f64 │ ╞═════╪═════╪═════╪═════╡ │ a ┆ 1.0 ┆ 2.0 ┆ 3.0 │ │ b ┆ 4.0 ┆ 5.0 ┆ 6.0 │ └─────┴─────┴─────┴─────┘ * // Replace the auto generated column with column names from a generator function function* namesGenerator() { const baseName = "my_column_"; let count = 0; while (true) { const name = `${baseName}${count}`; yield name; count++; } } * > df.transpose({includeHeader:false, columnNames:namesGenerator()}); shape: (2, 3) ┌─────────────┬─────────────┬─────────────┐ │ my_column_0 ┆ my_column_1 ┆ my_column_2 │ │ --- ┆ --- ┆ --- │ │ str ┆ str ┆ str │ ╞═════════════╪═════════════╪═════════════╡ │ 1.0 ┆ 2.0 ┆ 3.0 │ │ 4.0 ┆ 5.0 ┆ 6.0 │ └─────────────┴─────────────┴─────────────┘ * > df = pl.DataFrame({id: ["i", "j", "k"], a: [1, 2, 3], b: [4, 5, 6]}); * > df.transpose( { columnNames: "id" }); shape: (2, 3) ┌─────┬─────┬─────┐ │ i ┆ j ┆ k │ │ --- ┆ --- ┆ --- │ │ f64 ┆ f64 ┆ f64 │ ╞═════╪═════╪═════╡ │ 1.0 ┆ 2.0 ┆ 3.0 │ │ 4.0 ┆ 5.0 ┆ 6.0 │ └─────┴─────┴─────┘ * > df.transpose( { includeHeader: true, headerName: "new_id", columnNames: "id" }); shape: (2, 4) ┌────────┬─────┬─────┬─────┐ │ new_id ┆ i ┆ j ┆ k │ │ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ f64 ┆ f64 ┆ f64 │ ╞════════╪═════╪═════╪═════╡ │ a ┆ 1.0 ┆ 2.0 ┆ 3.0 │ │ b ┆ 4.0 ┆ 5.0 ┆ 6.0 │ └────────┴─────┴─────┴─────┘ */ transpose(options?: { includeHeader?: boolean; headerName?: string; columnNames?: Iterable; }): DataFrame; /** * Drop duplicate rows from this DataFrame. * Note that this fails if there is a column of type `List` in the DataFrame. * @param subset Column name(s), selector(s) to consider when identifying duplicate rows. If set to `None` (default), all columns are considered. * @param keep : 'first', 'last', 'any', 'none' * Which of the duplicate rows to keep. * 'any': Defaut, does not give any guarantee of which row is kept. This allows more optimizations. * 'none': Don't keep duplicate rows. * 'first': Keep the first unique row. * 'last': Keep the last unique row. * @param maintainOrder Keep the same order as the original DataFrame. This is more expensive to compute. Default: false * @returns DataFrame with unique rows. * @example * const df = pl.DataFrame({ foo: [1, 2, 2, 3], bar: [1, 2, 2, 4], ham: ["a", "d", "d", "c"], }); > df.unique(); By default, all columns are considered when determining which rows are unique: shape: (3, 3) ┌─────┬─────┬─────┐ │ foo ┆ bar ┆ ham │ │ --- ┆ --- ┆ --- │ │ f64 ┆ f64 ┆ str │ ╞═════╪═════╪═════╡ │ 3.0 ┆ 4.0 ┆ c │ │ 1.0 ┆ 1.0 ┆ a │ │ 2.0 ┆ 2.0 ┆ d │ └─────┴─────┴─────┘ > df.unique("foo"); shape: (3, 3) ┌─────┬─────┬─────┐ │ foo ┆ bar ┆ ham │ │ --- ┆ --- ┆ --- │ │ f64 ┆ f64 ┆ str │ ╞═════╪═════╪═════╡ │ 3.0 ┆ 4.0 ┆ c │ │ 1.0 ┆ 1.0 ┆ a │ │ 2.0 ┆ 2.0 ┆ d │ └─────┴─────┴─────┘ > df.unique(["foo", "ham"], "first", true); or df.unique({ subset: ["foo", "ham"], keep: "first", maintainOrder: true }); shape: (3, 3) ┌─────┬─────┬─────┐ │ foo ┆ bar ┆ ham │ │ --- ┆ --- ┆ --- │ │ f64 ┆ f64 ┆ str │ ╞═════╪═════╪═════╡ │ 1.0 ┆ 1.0 ┆ a │ │ 2.0 ┆ 2.0 ┆ d │ │ 3.0 ┆ 4.0 ┆ c │ └─────┴─────┴─────┘ */ unique(subset?: ColumnSelection, keep?: "first" | "last" | "any" | "none", maintainOrder?: boolean): DataFrame; unique(opts: { subset?: ColumnSelection; keep?: "first" | "last" | "any" | "none"; maintainOrder?: boolean; }): DataFrame; /** Decompose struct columns into separate columns for each of their fields. The new columns will be inserted into the DataFrame at the location of the struct column. @param columns Name of the struct column(s) that should be unnested. @param separator Rename output column names as combination of the struct column name, name separator and field name. @example ``` > const df = pl.DataFrame({ ... "int": [1, 2], ... "str": ["a", "b"], ... "bool": [true, null], ... "list": [[1, 2], [3]], ... }) ... .toStruct("my_struct") ... .toFrame(); > df shape: (2, 1) ┌─────────────────────────────┐ │ my_struct │ │ --- │ │ struct[4]{'int',...,'list'} │ ╞═════════════════════════════╡ │ {1,"a",true,[1, 2]} │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ {2,"b",null,[3]} │ └─────────────────────────────┘ > df.unnest("my_struct") shape: (2, 4) ┌─────┬─────┬──────┬────────────┐ │ int ┆ str ┆ bool ┆ list │ │ --- ┆ --- ┆ --- ┆ --- │ │ i64 ┆ str ┆ bool ┆ list [i64] │ ╞═════╪═════╪══════╪════════════╡ │ 1 ┆ a ┆ true ┆ [1, 2] │ ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 2 ┆ b ┆ null ┆ [3] │ └─────┴─────┴──────┴────────────┘ ``` */ unnest(columns: string | string[], separator?: string): DataFrame; /** * Aggregate the columns of this DataFrame to their variance value. * @example * ``` * > const df = pl.DataFrame({ * > "foo": [1, 2, 3], * > "bar": [6, 7, 8], * > "ham": ['a', 'b', 'c'] * > }); * > df.var() * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ * │ f64 ┆ f64 ┆ str │ * ╞═════╪═════╪══════╡ * │ 1 ┆ 1 ┆ null │ * ╰─────┴─────┴──────╯ * ``` */ var(): DataFrame; /** Grow this DataFrame vertically by stacking a DataFrame to it. @param df - DataFrame to stack. @example ``` > const df1 = pl.DataFrame({"foo": [1, 2], "bar": [6, 7], "ham": ['a', 'b'] }); > const df2 = pl.DataFrame({"foo": [3, 4], "bar": [8, 9], "ham": ['c', 'd'] }); > df1.vstack(df2); shape: (4, 3) ┌─────┬─────┬─────┐ │ foo ┆ bar ┆ ham │ │ --- ┆ --- ┆ --- │ │ i64 ┆ i64 ┆ str │ ╞═════╪═════╪═════╡ │ 1 ┆ 6 ┆ a │ │ 2 ┆ 7 ┆ b │ │ 3 ┆ 8 ┆ c │ │ 4 ┆ 9 ┆ d │ └─────┴─────┴─────┘ ``` */ vstack(df: DataFrame): DataFrame; /** * Return a new DataFrame with the column added or replaced. * @param column - Series, where the name of the Series refers to the column in the DataFrame. */ withColumn(column: Series): DataFrame>; withColumn(column: Series | Expr): DataFrame; withColumns(...columns: (Expr | Series)[]): DataFrame; /** * Return a new DataFrame with the column renamed. * @param existingName * @param replacement */ withColumnRenamed(existingName: Existing, replacement: New): DataFrame<{ [K in keyof S as K extends Existing ? New : K]: S[K]; }>; withColumnRenamed(existing: string, replacement: string): DataFrame; withColumnRenamed(opts: { existingName: Existing; replacement: New; }): DataFrame<{ [K in keyof S as K extends Existing ? New : K]: S[K]; }>; withColumnRenamed(opts: { existing: string; replacement: string; }): DataFrame; /** * Add a column at index 0 that counts the rows. * @param name - name of the column to add * @deprecated - *since 0.23.0 use withRowIndex instead */ withRowCount(name?: string): DataFrame; /** * Add a row index as the first column in the DataFrame. * @param name Name of the index column. * @param offset Start the index at this offset. Cannot be negative. * @example * * >>> df = pl.DataFrame( ... { ... "a": [1, 3, 5], ... "b": [2, 4, 6], ... } ... ) >>> df.withRowIndex() shape: (3, 3) ┌───────┬─────┬─────┐ │ index ┆ a ┆ b │ │ --- ┆ --- ┆ --- │ │ u32 ┆ i64 ┆ i64 │ ╞═══════╪═════╪═════╡ │ 0 ┆ 1 ┆ 2 │ │ 1 ┆ 3 ┆ 4 │ │ 2 ┆ 5 ┆ 6 │ └───────┴─────┴─────┘ >>> df.withRowIndex("id", offset=1000) shape: (3, 3) ┌──────┬─────┬─────┐ │ id ┆ a ┆ b │ │ --- ┆ --- ┆ --- │ │ u32 ┆ i64 ┆ i64 │ ╞══════╪═════╪═════╡ │ 1000 ┆ 1 ┆ 2 │ │ 1001 ┆ 3 ┆ 4 │ │ 1002 ┆ 5 ┆ 6 │ └──────┴─────┴─────┘ */ withRowIndex(name?: string, offset?: number): DataFrame; /** @see {@link filter} */ where(predicate: any): DataFrame; /** Upsample a DataFrame at a regular frequency. The `every` and `offset` arguments are created with the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) Or combine them: - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". Parameters ---------- @param timeColumn Time column will be used to determine a date range. Note that this column has to be sorted for the output to make sense. @param every Interval will start 'every' duration. @param by First group by these columns and then upsample for every group. @param maintainOrder Keep the ordering predictable. This is slower. Returns ------- DataFrame Result will be sorted by `timeColumn` (but note that if `by` columns are passed, it will only be sorted within each `by` group). Examples -------- Upsample a DataFrame by a certain interval. >>> const df = pl.DataFrame({ "date": [ new Date(2024, 1, 1), new Date(2024, 3, 1), new Date(2024, 4, 1), new Date(2024, 5, 1), ], "groups": ["A", "B", "A", "B"], "values": [0, 1, 2, 3], }) .withColumn(pl.col("date").cast(pl.Date).alias("date")) .sort("date"); >>> df.upsample({timeColumn: "date", every: "1mo", by: "groups", maintainOrder: true}) .select(pl.col("*").forwardFill()); shape: (7, 3) ┌────────────┬────────┬────────┐ │ date ┆ groups ┆ values │ │ --- ┆ --- ┆ --- │ │ date ┆ str ┆ f64 │ ╞════════════╪════════╪════════╡ │ 2024-02-01 ┆ A ┆ 0.0 │ │ 2024-03-01 ┆ A ┆ 0.0 │ │ 2024-04-01 ┆ A ┆ 0.0 │ │ 2024-05-01 ┆ A ┆ 2.0 │ │ 2024-04-01 ┆ B ┆ 1.0 │ │ 2024-05-01 ┆ B ┆ 1.0 │ │ 2024-06-01 ┆ B ┆ 3.0 │ └────────────┴────────┴────────┘ */ upsample(timeColumn: string, every: string, by?: string | string[], maintainOrder?: boolean): DataFrame; upsample(opts: { timeColumn: string; every: string; by?: string | string[]; maintainOrder?: boolean; }): DataFrame; } type DataResourceField = { name: string; type: string; }; /** * Tabular Data Resource from https://specs.frictionlessdata.io/schemas/tabular-data-resource.json, */ type TabularDataResource = { data: any[]; schema: { fields: DataResourceField[]; }; }; /** @ignore */ export declare const _DataFrame: (_df: any) => DataFrame; interface DataFrameOptions = any> { columns?: any[]; orient?: "row" | "col"; schema?: S; schemaOverrides?: O; inferSchemaLength?: number; } /** * DataFrame constructor */ export interface DataFrameConstructor extends Deserialize { /** * Create an empty DataFrame */ (): DataFrame; /** * Create a DataFrame from a JavaScript object * * @param data - object or array of data * @param options - options * @param options.columns - column names * @param options.orient - orientation of the data [row, col] * Whether to interpret two-dimensional data as columns or as rows. If None, the orientation is inferred by matching the columns and data dimensions. If this does not yield conclusive results, column orientation is used. * @param options.schema - The schema of the resulting DataFrame. The schema may be declared in several ways: * - As a dict of {name:type} pairs; if type is None, it will be auto-inferred. * - As a list of column names; in this case types are automatically inferred. * - As a list of (name,type) pairs; this is equivalent to the dictionary form. * If you supply a list of column names that does not match the names in the underlying data, the names given here will overwrite them. The number of names given in the schema should match the underlying data dimensions. * * If set to null (default), the schema is inferred from the data. * @param options.schemaOverrides - Support type specification or override of one or more columns; note that any dtypes inferred from the schema param will be overridden. * * @param options.inferSchemaLength - The maximum number of rows to scan for schema inference. If set to None, the full data may be scanned (this can be slow). This parameter only applies if the input data is a sequence or generator of rows; other input is read as-is. * The number of entries in the schema should match the underlying data dimensions, unless a sequence of dictionaries is being passed, in which case a partial schema can be declared to prevent specific fields from being loaded. * * @example * ``` * > pl.DataFrame({ a: [1, 2, 3], b: ["a", "b", "c"] }); shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ │ --- ┆ --- │ │ f64 ┆ str │ ╞═════╪═════╡ │ 1.0 ┆ a │ │ 2.0 ┆ b │ │ 3.0 ┆ c │ └─────┴─────┘ * To specify a more detailed/specific frame schema you can supply the `schema` parameter with a dictionary of (name,dtype) pairs... * > const data = {col1: [0, 2], col2: [3, 7]} * > pl.DataFrame(data, { schema: { "col1": pl.Float32, "col2": pl.Int64}} ); shape: (2, 2) ┌──────┬──────┐ │ col1 ┆ col2 │ │ --- ┆ --- │ │ f32 ┆ i64 │ ╞══════╪══════╡ │ 0.0 ┆ 3 │ │ 2.0 ┆ 7 │ └──────┴──────┘ * Constructing a DataFrame from a list of lists, row orientation and columns specified * > const data = [[1, 2, 3], [4, 5, 6]]; * > pl.DataFrame(data, { columns: ["a", "b", "c"], orient: "row" }); shape: (2, 3) ┌─────┬─────┬─────┐ │ a ┆ b ┆ c │ │ --- ┆ --- ┆ --- │ │ f64 ┆ f64 ┆ f64 │ ╞═════╪═════╪═════╡ │ 1.0 ┆ 2.0 ┆ 3.0 │ │ 4.0 ┆ 5.0 ┆ 6.0 │ └─────┴─────┴─────┘ * Constructing an empty DataFrame with a schema * > const schema = { s: pl.String, b: pl.Bool, i: pl.Int32, d: pl.Datetime("ms"), a: pl.Struct([ new pl.Field("b", pl.Bool), new pl.Field("bb", pl.Bool), new pl.Field("s", pl.String), new pl.Field("x", pl.Float64), ]), }; * > pl.DataFrame({}, { schema }) or pl.DataFrame(null, { schema }) or pl.DataFrame(underfined, { schema }); shape: (0, 5) ┌─────┬──────┬─────┬──────────────┬───────────┐ │ s ┆ b ┆ i ┆ d ┆ a │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ bool ┆ i32 ┆ datetime[ms] ┆ struct[0] │ ╞═════╪══════╪═════╪══════════════╪═══════════╡ └─────┴──────┴─────┴──────────────┴───────────┘ * ``` */ >(data: T1, options?: DataFrameOptions): DataFrame<{ [K in T1[number] as K["name"]]: K["dtype"]; }>; > = any, S extends Simplify> = Simplify>>(data: RecordInput, options?: DataFrameOptions): DataFrame; (data: any, options?: DataFrameOptions): DataFrame; isDataFrame(arg: any): arg is DataFrame; } export declare const DataFrame: DataFrameConstructor; export {};