import { MemoryResource } from '@rapidsai/rmm'; import { PadSideType } from '../column'; import { Series } from '../series'; import { Bool8, Float32, Float64, Int16, Int32, Int64, Int8, Integral, TimestampDay, TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond, Uint16, Uint32, Uint64, Uint8, Utf8String } from '../types/dtypes'; import { GetJSONObjectOptions } from '../types/json'; export declare type ConcatenateOptions = { /** String that should inserted between each string from each row. Default is an empty string. */ separator?: string; /** String that should be used in place of any null strings found in any column. Default makes a null entry in any produces a null result for that row. */ nullRepr?: string; /** If true, then the separator is included for null rows if nullRepr is valid. Default is true. */ separatorOnNulls?: boolean; /** Device memory resource used to allocate the returned column's device memory. */ memoryResource?: MemoryResource; }; /** * A Series of utf8-string values in GPU memory. */ export declare class StringSeries extends Series { /** * Row-wise concatenates the given list of strings series and returns a single string series * result. * * @param series List of string series to concatenate. * @param opts Options for the concatenation * @returns New series with concatenated results. * * @example * ```typescript * import {StringSeries} from '@rapidsai/cudf'; * const s = StringSeries.new(['a', 'b', null]) * const t = StringSeries.new(['foo', null, 'bar']) * [...StringSeries.concatenate([s, t])] // ["afoo", null, null] * ``` */ static concatenate(series: StringSeries[], opts?: ConcatenateOptions): Series; /** @ignore */ _castAsString(_memoryResource?: MemoryResource): StringSeries; /** @ignore */ _castAsInt8(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsInt16(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsInt32(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsInt64(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsUint8(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsUint16(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsUint32(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsUint64(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsFloat32(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsFloat64(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsTimeStampDay(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsTimeStampSecond(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsTimeStampMillisecond(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsTimeStampMicrosecond(memoryResource?: MemoryResource): Series; /** @ignore */ _castAsTimeStampNanosecond(memoryResource?: MemoryResource): Series; /** * Return a value at the specified index to host memory * * @param index the index in this Series to return a value for * * @example * ```typescript * import {Series} from "@rapidsai/cudf"; * * // StringSeries * Series.new(["foo", "bar", "test"]).getValue(0) // "foo" * Series.new(["foo", "bar", "test"]).getValue(2) // "test" * Series.new(["foo", "bar", "test"]).getValue(3) // throws index out of bounds error * ``` */ getValue(index: number): string | null; /** * set value at the specified index * * @param index the index in this Series to set a value for * @param value the value to set at `index` * * @example * ```typescript * import {Series} from "@rapidsai/cudf"; * * // StringSeries * const a = Series.new(["foo", "bar", "test"]) * a.setValue(2, "test1") // inplace update -> Series(["foo", "bar", "test1"]) * ``` */ setValue(index: number, value: string): void; /** * Series of integer offsets for each string * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(["foo", "bar"]); * * a.offsets // Int32Array(3) [ 0, 3, 6 ] * ``` */ get offsets(): import("./integral").Int32Series; /** * Series containing the utf8 characters of each string * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(["foo", "bar"]); * * a.data // Uint8Array(6) [ 102, 111, 111, 98, 97, 114 ] * ``` */ get data(): import("./integral").Uint8Series; /** * Returns a boolean series identifying rows which match the given regex pattern. * * @param pattern Regex pattern to match to each string. * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * * The regex pattern strings accepted are described here: * * https://docs.rapids.ai/api/libcudf/stable/md_regex.html * * A RegExp may also be passed, however all flags are ignored (only `pattern.source` is used) * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['Finland','Colombia','Florida', 'Russia','france']); * * // items starting with F (only upper case) * a.containsRe(/^F/) // [true, false, true, false, false] * // items starting with F or f * a.containsRe(/^[Ff]/) // [true, false, true, false, true] * // items ending with a * a.containsRe("a$") // [false, true, true, true, false] * // items containing "us" * a.containsRe("us") // [false, false, false, true, false] * ``` */ containsRe(pattern: string | RegExp, memoryResource?: MemoryResource): Series; /** * Returns an Int32 series the number of bytes of each string in the Series. * * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['Hello', 'Bye', 'Thanks 😊', null]); * * a.byteCount() // [5, 3, 11, null] * ``` */ byteCount(memoryResource?: MemoryResource): Series; /** * Returns an Int32 series the number of times the given regex pattern matches * in each string. * * @param pattern Regex pattern to match to each string. * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * * The regex pattern strings accepted are described here: * * https://docs.rapids.ai/api/libcudf/stable/md_regex.html * * A RegExp may also be passed, however all flags are ignored (only `pattern.source` is used) * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['Finland','Colombia','Florida', 'Russia','france']); * * // count occurences of "o" * a.countRe(/o/) // [0, 2, 1, 0, 0] * // count occurences of "an" * a.countRe('an') // [1, 0, 0, 0, 1] * * // get number of countries starting with F or f * a.countRe(/^[fF]).count() // 3 * ``` */ countRe(pattern: string | RegExp, memoryResource?: MemoryResource): Series; /** * Returns a boolean column identifying strings in which all characters are valid for conversion * to integers from hex. * * The output row entry will be set to true if the corresponding string element has at least one * character in [0-9A-Za-z]. Also, the string may start with '0x'. * * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['123', '-456', '', 'AGE', '0x9EF']); * * a.isHex() // [true, false, false, false, true] * ``` */ isHex(memoryResource?: MemoryResource): Series; /** * Returns a new integer numeric series parsing hexadecimal values. * * Any null entries will result in corresponding null entries in the output series. * * Only characters [0-9] and [A-F] are recognized. When any other character is encountered, * the parsing ends for that string. No interpretation is made on the sign of the integer. * * Overflow of the resulting integer type is not checked. Each string is converted using an * int64 type and then cast to the target integer type before storing it into the output series. * If the resulting integer type is too small to hold the value, the stored value will be * undefined. * * @param dataType Type of integer numeric series to return. * @param memoryResource The optional MemoryResource used to allocate the result Series' device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['04D2', 'FFFFFFFF', '00', '1B', '146D7719', null]); * * a.hexToIntegers() // [1234, -1, 0, 27, 342718233, null] * ``` */ hexToIntegers(dataType: R, memoryResource?: MemoryResource): Series; /** * Returns a boolean column identifying strings in which all characters are valid for conversion * to integers from IPv4 format. * * The output row entry will be set to true if the corresponding string element has the following * format xxx.xxx.xxx.xxx where xxx is integer digits between 0-255. * * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['123.255.0.7', '127.0.0.1', '', '1.2.34', '123.456.789.10', null]); * * a.isIpv4() // [true, true, false, false, false, null] * ``` */ isIpv4(memoryResource?: MemoryResource): Series; /** * Converts IPv4 addresses into integers. * * The IPv4 format is 1-3 character digits [0-9] between 3 dots (e.g. 123.45.67.890). Each section * can have a value between [0-255]. * * The four sets of digits are converted to integers and placed in 8-bit fields inside the * resulting integer. * * i0.i1.i2.i3 -> (i0 << 24) | (i1 << 16) | (i2 << 8) | (i3) * * No checking is done on the format. If a string is not in IPv4 format, the resulting integer is * undefined. * * The resulting 32-bit integer is placed in an int64_t to avoid setting the sign-bit in an * int32_t type. This could be changed if cudf supported a UINT32 type in the future. * * Any null entries will result in corresponding null entries in the output column.Returns a new * Int64 numeric series parsing hexadecimal values from the provided string series. * * @param dataType Type of integer numeric series to return. * @param memoryResource The optional MemoryResource used to allocate the result Series' device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['123.255.0.7', '127.0.0.1', null]); * * a.ipv4ToIntegers() // [2080309255n, 2130706433n, null] * ``` */ ipv4ToIntegers(memoryResource?: MemoryResource): Series; /** * Returns an Int32 series the length of each string in the Series. * * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['dog', '', '\n', null]); * * a.len() // [3, 0, 1 null] * ``` */ len(memoryResource?: MemoryResource): Series; /** * Returns a boolean series identifying rows which match the given regex pattern * only at the beginning of the string * * @param pattern Regex pattern to match to each string. * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * * The regex pattern strings accepted are described here: * * https://docs.rapids.ai/api/libcudf/stable/md_regex.html * * A RegExp may also be passed, however all flags are ignored (only `pattern.source` is used) * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['Finland','Colombia','Florida', 'Russia','france']); * * // start of item contains "C" * a.matchesRe(/C/) // [false, true, false, false, false] * // start of item contains "us", returns false since none of the items start with "us" * a.matchesRe('us') // [false, false, false, false, false] * ``` */ matchesRe(pattern: string | RegExp, memoryResource?: MemoryResource): Series; /** * Add padding to each string using a provided character. * * If the string is already width or more characters, no padding is performed. No strings are * truncated. * * Null string entries result in null entries in the output column. * * @param width The minimum number of characters for each string. * @param side Where to place the padding characters. Default is pad right (left justify). * @param fill_char Single UTF-8 character to use for padding. Default is the space character. * @param memoryResource The optional MemoryResource used to allocate the result Column's device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['aa','bbb','cccc','ddddd', null]); * * a.pad(4) // ['aa ','bbb ','cccc','ddddd', null] * ``` */ pad(width: number, side?: PadSideType, fill_char?: string, memoryResource?: MemoryResource): Series; /** * Add '0' as padding to the left of each string. * * If the string is already width or more characters, no padding is performed. No strings are * truncated. * * This equivalent to `pad(width, 'left', '0')` but is more optimized for this special case. * * Null string entries result in null entries in the output column. * * @param width The minimum number of characters for each string. * @param memoryResource The optional MemoryResource used to allocate the result Column's device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = Series.new(['1234','-9876','+0.34','-342567', null]); * * a.zfill(6) // ['001234','0-9876','0+0.34','-342567', null] * ``` */ zfill(width: number, memoryResource?: MemoryResource): Series; /** * For each string in the column, replaces any character sequence matching the given pattern with * the provided replacement string. * * Null string entries will return null output string entries. * * Position values are 0-based meaning position 0 is the first character of each string. * * This function can be used to insert a string into specific position by specifying the same * position value for start and stop. The repl string can be appended to each string by specifying * -1 for both start and stop. * * @param pattern The regular expression pattern to search within each string. * @param replacement The string used to replace the matched sequence in each string. * Default is an empty string. * @param maxReplaceCount The maximum number of times to replace the matched pattern within each * string. Default replaces every substring that is matched. * @param memoryResource The optional MemoryResource used to allocate the result Column's device * memory. * @returns New strings column with matching elements replaced. */ replaceRe(pattern: RegExp, replacement?: string, maxReplaceCount?: number, memoryResource?: MemoryResource): Series; /** * Replaces each string in the column with the provided repl string within the [start,stop) * character position range. * * Null string entries will return null output string entries. * * Position values are 0-based meaning position 0 is the first character of each string. * * This function can be used to insert a string into specific position by specifying the same * position value for start and stop. The repl string can be appended to each string by specifying * -1 for both start and stop. * * @param repl Replacement string for specified positions found. * @param start Start position where repl will be added. Default is 0, first character position. * @param stop End position (exclusive) to use for replacement. Default of -1 specifies the end of * each string. * @param memoryResource The optional MemoryResource used to allocate the result Column's device * memory. */ replaceSlice(repl: string, start: number, stop: number, memoryResource?: MemoryResource): Series; /** * Splits a StringSeries along the delimiter. * * @note If delimiter is omitted, the default is ''. * * @param delimiter Optional delimiter. * * @returns Series with new splits determined by the delimiter. */ split(delimiter?: string, memoryResource?: MemoryResource): Series; /** * Returns a set of 3 columns by splitting each string using the specified delimiter. * * The number of rows in the output columns will be the same as the input column. The first column * will contain the first tokens of each string as a result of the split. The second column will * contain the delimiter. The third column will contain the remaining characters of each string * after the delimiter. * * Any null string entries return corresponding null output columns. * * @note If delimiter is omitted, the default is ''. * * @param delimiter UTF-8 encoded string indicating where to split each string. Default of empty * string indicates split on whitespace. * @param memoryResource The optional MemoryResource used to allocate the result Column's device * memory. * @returns 3 new string columns representing before the delimiter, the delimiter, and after the * delimiter. * * @example * ```typescript * import {DataFrame, Series} from '@rapidsai/cudf'; * * const strs = Series.new(["a_b", "c_d"]); * const [before, delim, after] = strs.partition('_'); * * new DataFrame({ before, delim, after }).toString(); * // before delim after * // a _ b * // c _ d * ``` */ partition(delimiter?: string, memoryResource?: MemoryResource): [ Series, Series, Series ]; /** * Applies a JSONPath(string) where each row in the series is a valid json string. Returns New * StringSeries containing the retrieved json object strings * * @param jsonPath The JSONPath string to be applied to each row of the input column * @param memoryResource The optional MemoryResource used to allocate the result Series's device * memory. * * @example * ```typescript * import {Series} from '@rapidsai/cudf'; * const a = const lines = Series.new([ * {foo: {bar: "baz"}}, * {foo: {baz: "bar"}}, * ].map(JSON.stringify)); // StringSeries ['{"foo":{"bar":"baz"}}', '{"foo":{"baz":"bar"}}'] * * a.getJSONObject("$.foo") // StringSeries ['{"bar":"baz"}', '{"baz":"bar"}'] * a.getJSONObject("$.foo.bar") // StringSeries ["baz", null] * * // parse the resulting strings using JSON.parse * [...a.getJSONObject("$.foo").map(JSON.parse)] // object [{ bar: 'baz' }, { baz: 'bar' }] * ``` */ getJSONObject(jsonPath: string, options?: GetJSONObjectOptions, memoryResource?: MemoryResource): Series; } //# sourceMappingURL=string.d.ts.map