/*! * Copyright (c) Microsoft. All rights reserved. * Licensed under the MIT license. See LICENSE file in the project. */ import type { DataFormat } from "../data.js"; /** * Configuration values for interpreting data types when parsing a delimited file. * By default, all values are read as strings - applying these type hints can derive primitive types from the strings. */ export interface TypeHints { /** * The data format */ dataFormat?: DataFormat; /** * The character to use for delimiting arrays. */ arrayDelimiter?: string; /** * Default: case-insensitive word "true". */ trueValues?: string[]; /** * Default: case-insensitive word "false". */ falseValues?: string[]; /** * Strings to consider NaN or null. * Default: * ['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA', '', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''] */ naValues?: string[]; /** * For large numbers that have been written with a thousands separator, you can set the thousands keyword to a string of length 1 so that integers will be parsed correctly. * By default, numbers with a thousands separator will be parsed as strings. * Default: none. */ thousands?: string; /** * Character to use when parsing decimal numbers. * Default: . */ decimal?: string; /** * Strings to parse as negative and positive infinity. * Default: case insensitive ["-inf", "inf"]. */ infinity?: [string, string]; /** * Default date format to use when parsing dates. The Codebook can override this at the column level. * Default: yyyy-MM-dd * TODO: spark has a separate config for datetime. Do we care? */ dateFormat?: string; }