/**
* @copyright Copyright (c) 2015 All Rights Reserved.
* @author Baris Yuksel <baris@onehundredyearsofcode.com>
*
* @file Module which exports private object with static methods.
*/
exports = module.exports = private;
/**
* Constructs a new private class.
* @constructor
*/
function private() {}
/**
* Double-quotes the fields according the rules on Wikipedia as of 6/2015.
* ({@link https://en.wikipedia.org/wiki/Comma-separated_values})
*
* * Fields with embedded commas or double-quote characters must be quoted.
* * Each of the embedded double-quote characters must be represented by a pair of double-quote characters.
* * Fields with embedded line breaks must be quoted.
* * (Python addition) Python csv parser double-quotes `\r` always.
* @param {string} input - Input to double quote if necessary.
*/
private.doubleQuoteIfNecessary = function(input) {
var shouldDoubleQuote = false;
var output = '';
for (var i = 0; i < input.length; i++) {
if (input[i] === ',' || // comma
input[i] === '"' || // double-quote
input[i] === '\n' || // newline
input[i] === '\r') {
shouldDoubleQuote = true;
}
output += input[i];
if (input[i] === '"') output += '"'; // double-quote double-quote
}
if (shouldDoubleQuote) {
output = '"' + output + '"';
}
return output;
};
/**
* Get the argument from the argument dictionary, with a default value.
* Default value (defval) is returned if the argument is not in the argument:value
* dictionary, or, if the argument's type is not the same as default value.
* @param {dictionary} argDic A dictionary of the type argName: value
* @param {string} argName The name of the argument to be found in dictionary
* @param {*} defVal If it cannot find this argName in argDic, it returns defVal.
*/
private.getArg = function(argDic, argName, defVal) {
return (typeof argDic !== 'undefined' &&
typeof argDic[argName] !== 'undefined' &&
typeof argDic[argName] === typeof defVal) ? argDic[argName] : defVal;
};
/**
* Given a csv string, it parses it into double array.
* Delimiter can be changed with argdic's delim parameter.
*
* Follows the csv format described in {@link http://tools.ietf.org/html/rfc4180}
* as in: *"Fields containing line breaks (CRLF), double quotes, and commas
* should be enclosed in double-quotes."*
* @param {dictionary} argdic - A dictionary of possible parameters.
* @param {string} str - csv string to be parsed.
* Recognized values are:
* * `hasHeaders`: boolean - Whether str has header column. *[default: false]*
* * `delim`: char - Single delimiter char *[default: ',']*
* * `hasComments`: boolean - Whether str has comments. If true, any row which
* starts with `#` will be skipped. *[default: false]*
*/
private.parseStringToArray = function(str, argdic) {
var delim = this.getArg(argdic, 'delim', ',');
var hasComments = this.getArg(argdic, 'hasComments', false);
// Define a function that returns true if the char is a terminator
// Terminator chars are: delim char, \n
function isTerminator(myChar) {
if ( '\n' === myChar ||
delim === myChar) {
return true;
}
return false;
}
var noCharSinceRowPush = true;
var allRows = [];
var currentRow = [];
var currentCell = '';
var j = 0;
while (j < str.length) {
if (hasComments && noCharSinceRowPush && str[j] === '#') {
// The first char of the row is comment char
while (j < str.length && str[j] !== '\n') {
j++;
}
} else {
// Special situation: skip \r, if it is part of \r\n
if (str[j] === '\r' &&
j + 1 < str.length &&
str[j+1] === '\n' ) {
j++;
}
// Look at double-quotes
if (str[j] === '"') {
noCharSinceRowPush = false;
// This is a double-quoted cell, let's retrieve the whole cell.
var quoteCount = 0;
var endOfWord = j + 1;
while (endOfWord < str.length) {
if (str[endOfWord] === '"') {
// Found quote. Are we at the end of a cell?
if ((endOfWord + 1) === str.length ||
(isTerminator(str[endOfWord + 1]) && (quoteCount % 2) === 0)) {
// Yes, this is the end of the cell.
break;
} else {
// No, this is just another quote.
quoteCount++;
}
} else if ((endOfWord + 1) === str.length) {
// This should never happen, it means the cell was not quoted correctly.
// But still, let's save the situation by moving endOfWord by one so that
// we include the last character in the cell's value.
endOfWord++;
break;
}
endOfWord++;
}
currentCell = str.substring(j + 1, endOfWord);
currentCell = currentCell.replace(/""/g, '"');
j = endOfWord ;
} else if (isTerminator(str[j])) {
if (str[j] !== '\n') {
// Special case for empty rows which start with \n
// essentially, if the terminator is not \n, then
// we assume we had chars since row push.
noCharSinceRowPush = false;
}
currentRow.push(currentCell);
currentCell = '';
if (str[j] === '\n') {
// Special case for empty rows which start with \n.
// Python csv parser parses them as empty rows instead of
// rows having one empty string.
if (noCharSinceRowPush) {
currentRow = [];
}
// End of row
allRows.push(currentRow);
currentRow = [];
noCharSinceRowPush = true;
}
} else {
noCharSinceRowPush = false;
currentCell += str[j];
}
}
j++;
}
if (!noCharSinceRowPush) {
// If we have seen some chars after last row push
// Then, push this cell again
currentRow.push(currentCell);
allRows.push(currentRow);
}
return allRows;
};