/*! * Copyright (c) Microsoft. All rights reserved. * Licensed under the MIT license. See LICENSE file in the project. */ import type { DataFormat } from "../data.js" /** * Configuration values for interpreting data types when parsing a delimited file. * By default, all values are read as strings - applying these type hints can derive primitive types from the strings. */ export interface TypeHints { /** * The data format */ dataFormat?: DataFormat /** * The character to use for delimiting arrays. */ arrayDelimiter?: string /** * Default: case-insensitive word "true". */ trueValues?: string[] /** * Default: case-insensitive word "false". */ falseValues?: string[] /** * Strings to consider NaN or null. * Default: * ['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA', '', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''] */ naValues?: string[] /** * For large numbers that have been written with a thousands separator, you can set the thousands keyword to a string of length 1 so that integers will be parsed correctly. * By default, numbers with a thousands separator will be parsed as strings. * Default: none. */ thousands?: string /** * Character to use when parsing decimal numbers. * Default: . */ decimal?: string /** * Strings to parse as negative and positive infinity. * Default: case insensitive ["-inf", "inf"]. */ infinity?: [string, string] /** * Default date format to use when parsing dates. The Codebook can override this at the column level. * Default: yyyy-MM-dd * TODO: spark has a separate config for datetime. Do we care? */ dateFormat?: string }