Initial commit

This commit is contained in:
Spencer Pincott
2024-07-15 22:20:13 -04:00
commit 97737ca1ae
16618 changed files with 934131 additions and 0 deletions

View File

@@ -0,0 +1,27 @@
var util = require("util");
module.exports = CSVError;
function CSVError(err, index, extra) {
Error.call(this, "");
this.err = err;
this.line = index;
this.extra = extra;
this.message = "Error: " + err + ". JSON Line number: " + index + (extra ? " near: " + extra : "");
this.name = "CSV Error";
}
util.inherits(CSVError, Error);
CSVError.prototype.toString = function() {
return JSON.stringify([this.err, this.line, this.extra]);
};
CSVError.column_mismatched = function(index, extra) {
return new CSVError("column_mismatched", index, extra);
};
CSVError.unclosed_quote = function(index, extra) {
return new CSVError("unclosed_quote", index, extra);
};
CSVError.fromArray = function(arr) {
return new CSVError(arr[0], arr[1], arr[2]);
};

View File

@@ -0,0 +1,580 @@
var util = require("util");
var Transform = require("stream").Transform;
var os = require("os");
var stripBom = require('strip-bom');
var eol = os.EOL;
// var Processor = require("./Processor.js");
var defParam = require("./defParam");
var fileline = require("./fileline");
var fileLineToCSVLine = require("./fileLineToCSVLine");
var linesToJson = require("./linesToJson");
var CSVError = require("./CSVError");
var workerMgr = null;
var _ = require('lodash');
var rowSplit = require("./rowSplit");
function Converter(params, options) {
Transform.call(this, options);
this._options = options || {};
this.param = defParam(params);
this.param._options = this._options;
// this.resultObject = new Result(this);
// this.pipe(this.resultObject); // it is important to have downstream for a transform otherwise it will stuck
this.started = false;//indicate if parsing has started.
this.recordNum = 0;
this.lineNumber = 0; //file line number
this._csvLineBuffer = "";
this.lastIndex = 0; // index in result json array
//this._pipe(this.lineParser).pipe(this.processor);
// this.initNoFork();
if (this.param.forked) {
this.param.forked = false;
this.workerNum = 2;
}
this.flushCb = null;
this.processEnd = false;
this.sequenceBuffer = [];
this._needJson = null;
this._needEmitResult = null;
this._needEmitFinalResult = null;
this._needEmitHeader = null;
this._needEmitJson = null;
this._needPush = null;
this._needEmitCsv = null;
this._csvTransf = null;
this.finalResult = [];
// this.on("data", function() {});
this.on("error", emitDone(this));
this.on("end", emitDone(this));
this.initWorker();
process.nextTick(function () {
if (this._needEmitFinalResult === null) {
this._needEmitFinalResult = this.listeners("end_parsed").length > 0;
}
if (this._needEmitResult === null) {
this._needEmitResult = this.listeners("record_parsed").length > 0;
}
if (this._needEmitJson === null) {
this._needEmitJson = this.listeners("json").length > 0;
}
if (this._needEmitHeader === null) {
this._needEmitHeader = this.listeners("header").length > 0;
}
if (this._needEmitCsv === null) {
this._needEmitCsv = this.listeners("csv").length > 0;
}
if (this._needJson === null) {
this._needJson = this._needEmitJson || this._needEmitFinalResult || this._needEmitResult || this.transform || this._options.objectMode;
}
if (this._needPush === null) {
this._needPush = this.listeners("data").length > 0 || this.listeners("readable").length > 0;
// this._needPush=false;
}
this.param._needParseJson = this._needJson || this._needPush;
}.bind(this));
return this;
}
util.inherits(Converter, Transform);
function emitDone(conv) {
return function (err) {
if (!conv._hasDone) {
conv._hasDone = true;
process.nextTick(function () {
conv.emit('done', err);
});
};
}
}
function bufFromString(str) {
var length=Buffer.byteLength(str);
var buffer = Buffer.allocUnsafe
? Buffer.allocUnsafe(length)
: new Buffer(length);
buffer.write(str);
return buffer;
}
Converter.prototype._transform = function (data, encoding, cb) {
data=this.prepareData(data);
var idx =data.length-1;
var left=null;
/**
* From Keyang:
* The code below is to check if a single utf8 char (which could be multiple bytes) being split.
* If the char being split, the buffer from two chunk needs to be concat
* check how utf8 being encoded to understand the code below.
* If anyone has any better way to do this, please let me know.
*/
if ((data[idx] & 1<<7) !=0){
while ((data[idx] & 3<<6) === 128){
idx--;
}
idx--;
}
if (idx !=data.length-1){
left=data.slice(idx+1);
data=data.slice(0,idx+1)
var _cb=cb;
var self=this;
cb=function(){
if (self._csvLineBuffer){
self._csvLineBuffer=Buffer.concat([bufFromString(self._csvLineBuffer,"utf8"),left]);
}else{
self._csvLineBuffer=left;
}
_cb();
}
}
data = data.toString("utf8");
if (this.started === false) {
this.started = true;
data = stripBom(data);
if (this.param.toArrayString) {
if (this._needPush) {
this.push("[" + eol, "utf8");
}
}
}
var self = this;
this.preProcessRaw(data, function (d) {
if (d && d.length > 0) {
self.processData(d, cb);
} else {
cb();
}
});
};
Converter.prototype.prepareData = function (data) {
if (this._csvLineBuffer && this._csvLineBuffer.length>0){
if (typeof this._csvLineBuffer === "string"){
this._csvLineBuffer=bufFromString(this._csvLineBuffer);
}
return Buffer.concat([this._csvLineBuffer,data]);
}else{
return data;
}
// return this._csvLineBuffer + data;
};
Converter.prototype.setPartialData = function (d) {
this._csvLineBuffer = d;
};
Converter.prototype.processData = function (data, cb) {
var params = this.param;
if (params.ignoreEmpty && !params._headers) {
data = data.replace(/^\s+/, "");
}
var eol = this.param.eol;
var fileLines = fileline(data, this.param);
if (this.param.eol !== eol) {
this.emit("eol", this.param.eol);
}
if (fileLines.lines.length > 0) {
if (this.preProcessLine && typeof this.preProcessLine === "function") {
fileLines.lines = this._preProcessLines(fileLines.lines, this.lastIndex);
}
if (!params._headers) { //header is not inited. init header
this.processHead(fileLines, cb);
} else {
if (params.workerNum <= 1) {
var lines = fileLineToCSVLine(fileLines, params);
this.setPartialData(lines.partial);
var jsonArr = linesToJson(lines.lines, params, this.recordNum);
this.processResult(jsonArr);
this.lastIndex += jsonArr.length;
this.recordNum += jsonArr.length;
cb();
} else {
this.workerProcess(fileLines, cb);
}
}
} else {
this.setPartialData(fileLines.partial);
cb();
}
};
Converter.prototype._preProcessLines = function (lines, startIdx) {
var rtn = [];
for (var i = 0, len = lines.length; i < len; i++) {
var result = this.preProcessLine(lines[i], startIdx + i + 1);
if (typeof result === "string") {
rtn.push(result);
} else {
rtn.push(lines[i]);
this.emit("error", new Error("preProcessLine should return a string but got: " + JSON.stringify(result)));
}
}
return rtn;
};
Converter.prototype.initWorker = function () {
var workerNum = this.param.workerNum - 1;
if (workerNum > 0) {
workerMgr = require("./workerMgr");
this.workerMgr = workerMgr();
this.workerMgr.initWorker(workerNum, this.param);
}
};
Converter.prototype.preRawData = function (func) {
this.preProcessRaw = func;
return this;
};
Converter.prototype.preFileLine = function (func) {
this.preProcessLine = func;
return this;
};
/**
* workerpRocess does not support embeded multiple lines.
*/
Converter.prototype.workerProcess = function (fileLine, cb) {
var self = this;
var line = fileLine;
var eol = this.getEol();
this.setPartialData(line.partial);
this.workerMgr.sendWorker(line.lines.join(eol) + eol, this.lastIndex, cb, function (results, lastIndex) {
var buf;
var cur = self.sequenceBuffer[0];
if (cur.idx === lastIndex) {
cur.result = results;
var records = [];
while (self.sequenceBuffer[0] && self.sequenceBuffer[0].result) {
buf = self.sequenceBuffer.shift();
records = records.concat(buf.result);
}
self.processResult(records);
self.recordNum += records.length;
} else {
for (var i = 0, len = self.sequenceBuffer.length; i < len; i++) {
buf = self.sequenceBuffer[i];
if (buf.idx === lastIndex) {
buf.result = results;
break;
}
}
}
});
this.sequenceBuffer.push({
idx: this.lastIndex,
result: null
});
this.lastIndex += line.lines.length;
};
Converter.prototype.processHead = function (fileLine, cb) {
var params = this.param;
if (params._headers) {
return cb();
}
//dirty hack
params._needFilterRow = false;
// if header is not inited. init header
var lines = fileLine.lines;
var left = "";
var headerRow = [];
if (!params.noheader) {
while (lines.length) {
var line = left + lines.shift();
var delimiter = params.delimiter;
var row = rowSplit(line, params);
if (params.delimiter !== delimiter) {
this.emit("delimiter", params.delimiter);
}
if (row.closed) {
headerRow = row.cols;
left = "";
break;
} else {
left = line + this.getEol();
}
}
}
params._needFilterRow = true;
if (!params.noheader && headerRow.length === 0) { //if one chunk of data does not complete header row.
this.setPartialData(left);
return cb();
}
if (params.noheader) {
if (params.headers) {
params._headers = params.headers;
} else {
params._headers = [];
}
} else {
if (params.headers) {
params._headers = params.headers;
} else {
params._headers = headerRow;
}
}
configIgnoreIncludeColumns(params);
params._headers = require("./filterRow")(params._headers, params);
if (this._needEmitHeader && this.param._headers) {
this.emit("header", this.param._headers);
}
var delimiter = params.delimiter;
var lines = fileLineToCSVLine(fileLine, params);
if (params.delimiter !== delimiter) {
this.emit("delimiter", params.delimiter);
}
this.setPartialData(lines.partial);
if (this.param.workerNum > 1) {
this.workerMgr.setParams(params);
}
var res = linesToJson(lines.lines, params, 0);
// Put the header with the first row
// if(res.length > 0) res[0].header = params._headers;
this.processResult(res);
this.lastIndex += res.length;
this.recordNum += res.length;
cb();
};
function configIgnoreIncludeColumns(params) {
if (params._postIgnoreColumns) {
for (var i = 0; i < params.ignoreColumns.length; i++) {
var ignoreCol = params.ignoreColumns[i];
if (typeof ignoreCol === "string") {
var idx = params._headers.indexOf(ignoreCol);
if (idx > -1) {
params.ignoreColumns[i] = idx;
} else {
params.ignoreColumns[i] = -1;
}
}
}
params.ignoreColumns.sort(function (a, b) { return b - a; });
}
if (params._postIncludeColumns) {
for (var i = 0; i < params.includeColumns.length; i++) {
var includeCol = params.includeColumns[i];
if (typeof includeCol === "string") {
var idx = params._headers.indexOf(includeCol);
if (idx > -1) {
params.includeColumns[i] = idx;
} else {
params.includeColumns[i] = -1;
}
}
}
}
params.ignoreColumns = _.uniq(params.ignoreColumns);
params.includeColumns = _.uniq(params.includeColumns);
}
Converter.prototype.processResult = function (result) {
for (var i = 0, len = result.length; i < len; i++) {
var r = result[i];
if (r.err) {
this.emit("error", r.err);
} else {
this.emitResult(r);
}
}
};
Converter.prototype.emitResult = function (r) {
var index = r.index;
var header = this.param;
var row = r.row;
var result = r.json;
var resultJson = null;
var resultStr = null;
if (typeof result === "string") {
resultStr = result;
} else {
resultJson = result;
}
if (resultJson === null && this._needJson) {
resultJson = JSON.parse(resultStr);
if (typeof row === "string") {
row = JSON.parse(row);
}
}
if (this.transform && typeof this.transform === "function") {
this.transform(resultJson, row, index);
resultStr = null;
}
if (this._needEmitJson) {
this.emit("json", resultJson, index);
}
if (this._needEmitCsv) {
if (typeof row === "string") {
row = JSON.parse(row);
}
this.emit("csv", row, index);
}
if (this.param.constructResult && this._needEmitFinalResult) {
this.finalResult.push(resultJson);
}
if (this._needEmitResult) {
this.emit("record_parsed", resultJson, row, index);
}
if (this.param.toArrayString && index > 0 && this._needPush) {
this.push("," + eol);
}
if (this._options && this._options.objectMode) {
this.push(resultJson);
} else {
if (this._needPush) {
if (resultStr === null) {
resultStr = JSON.stringify(resultJson);
}
this.push(!this.param.toArrayString ? resultStr + eol : resultStr, "utf8");
}
}
};
Converter.prototype.preProcessRaw = function (data, cb) {
cb(data);
};
// FIXME: lineNumber is not used.
Converter.prototype.preProcessLine = function (line, lineNumber) {
return line;
};
Converter.prototype._flush = function (cb) {
var self = this;
this.flushCb = function () {
self.emit("end_parsed", self.finalResult);
if (self.workerMgr) {
self.workerMgr.destroyWorker();
}
cb();
if (!self._needPush) {
self.emit("end");
}
};
if (this._csvLineBuffer.length > 0) {
var eol = this.getEol();
if (this._csvLineBuffer[this._csvLineBuffer.length - 1] !== eol) {
this._csvLineBuffer += eol;
}
this.processData(this._csvLineBuffer, function () {
this.checkAndFlush();
}.bind(this));
} else {
this.checkAndFlush();
}
return;
};
Converter.prototype.checkAndFlush = function () {
if (this._csvLineBuffer.length !== 0) {
this.emit("error", CSVError.unclosed_quote(this.recordNum, this._csvLineBuffer), this._csvLineBuffer);
}
if (this.param.toArrayString && this._needPush) {
this.push(eol + "]", "utf8");
}
if (this.workerMgr && this.workerMgr.isRunning()) {
this.workerMgr.drain = function () {
this.flushCb();
}.bind(this);
} else {
this.flushCb();
}
};
Converter.prototype.getEol = function (data) {
if (!this.param.eol && data) {
for (var i = 0, len = data.length; i < len; i++) {
if (data[i] === "\r") {
if (data[i + 1] === "\n") {
this.param.eol = "\r\n";
} else {
this.param.eol = "\r";
}
return this.param.eol;
} else if (data[i] === "\n") {
this.param.eol = "\n";
return this.param.eol;
}
}
this.param.eol = eol;
}
return this.param.eol || eol;
};
Converter.prototype.fromFile = function (filePath, cb, options) {
var fs = require('fs');
var rs = null;
if (typeof cb ==="object" && typeof options === "undefined"){
options=cb;
cb=null;
}
this.wrapCallback(cb, function () {
if (rs && rs.destroy) {
rs.destroy();
}
});
fs.exists(filePath, function (exist) {
if (exist) {
rs = fs.createReadStream(filePath,options);
rs.pipe(this);
} else {
this.emit('error', new Error("File does not exist. Check to make sure the file path to your csv is correct."));
}
}.bind(this));
return this;
};
Converter.prototype.fromStream = function (readStream, cb) {
if (cb && typeof cb === "function") {
this.wrapCallback(cb);
}
readStream.pipe(this);
return this;
};
Converter.prototype.transf = function (func) {
this.transform = func;
return this;
};
Converter.prototype.fromString = function (csvString, cb) {
if (typeof csvString !== "string") {
if (cb && typeof cb ==="function"){
return cb(new Error("Passed CSV Data is not a string."));
}
}
if (cb && typeof cb === "function") {
this.wrapCallback(cb, function () {
});
}
process.nextTick(function () {
this.end(csvString);
}.bind(this));
return this;
};
Converter.prototype.wrapCallback = function (cb, clean) {
if (clean === undefined) {
clean = function () { };
}
if (cb && typeof cb === "function") {
this.once("end_parsed", function (res) {
if (!this.hasError) {
cb(null, res);
}
}.bind(this));
}
this.once("error", function (err) {
this.hasError = true;
if (cb && typeof cb === "function") {
cb(err);
}
clean();
}.bind(this));
};
module.exports = Converter;

View File

@@ -0,0 +1,23 @@
var getEol = require("./getEol");
var rowSplit = require("./rowSplit");
/**
* Convert lines to csv columns
* @param {[type]} lines [file lines]
* @param {[type]} param [Converter param]
* @return {[type]} {lines:[[col1,col2,col3...]],partial:String}
*/
module.exports = function(lines, param) {
var csvLines = [];
var left = "";
while (lines.length) {
var line = left + lines.shift();
var row = rowSplit(line, param);
if (row.closed || param.alwaysSplitAtEOL) {
csvLines.push(row.cols);
left = "";
} else {
left = line + (getEol(line, param) || "\n"); // if unable to getEol from data, assume "\n"
}
}
return {lines: csvLines, partial: left};
};

View File

@@ -0,0 +1,17 @@
var fileline=require("./fileline");
var csvline=require("./csvline");
/**
* Convert data chunk to csv lines with cols
* @param {[type]} data [description]
* @param {[type]} params [description]
* @return {[type]} {lines:[[col1,col2,col3]],partial:String}
*/
module.exports = function(data, params) {
var line = fileline(data, params);
var lines = line.lines;
var csvLines = csvline(lines, params);
return {
lines: csvLines.lines,
partial: csvLines.partial + line.partial
};
};

View File

@@ -0,0 +1,67 @@
var numExp = /^[0-9]+$/;
module.exports = function (params) {
var _param = {
constructResult: true, //set to false to not construct result in memory. suitable for big csv data
delimiter: ',', // change the delimiter of csv columns. It is able to use an array to specify potencial delimiters. e.g. [",","|",";"]
ignoreColumns: [], // columns to ignore upon input.
includeColumns: [], // columns to include upon input.
quote: '"', //quote for a column containing delimiter.
trim: true, //trim column's space charcters
checkType: false, //whether check column type
toArrayString: false, //stream down stringified json array instead of string of json. (useful if downstream is file writer etc)
ignoreEmpty: false, //Ignore empty value while parsing. if a value of the column is empty, it will be skipped parsing.
workerNum: getEnv("CSV_WORKER", 1), //number of parallel workers. If multi-core CPU available, increase the number will get better performance for large csv data.
fork: false, //use another CPU core to convert the csv stream
noheader: false, //indicate if first line of CSV file is header or not.
headers: null, //an array of header strings. If noheader is false and headers is array, csv header will be ignored.
flatKeys: false, // Don't interpret dots and square brackets in header fields as nested object or array identifiers at all.
maxRowLength: 0, //the max character a csv row could have. 0 means infinite. If max number exceeded, parser will emit "error" of "row_exceed". if a possibly corrupted csv data provided, give it a number like 65535 so the parser wont consume memory. default: 0
checkColumn: false, //whether check column number of a row is the same as headers. If column number mismatched headers number, an error of "mismatched_column" will be emitted.. default: false
escape: '"', //escape char for quoted column
colParser:{}, //flags on columns to alter field processing.
/**below are internal params */
_columnConv:[],
_headerType: [],
_headerTitle: [],
_headerFlag: [],
_headers: null,
_needFilterRow: false
};
if (!params) {
params = {};
}
for (var key in params) {
if (params.hasOwnProperty(key)) {
if (Array.isArray(params[key])) {
_param[key] = [].concat(params[key]);
} else {
_param[key] = params[key];
}
}
}
if (_param.ignoreColumns.length > 0 && !numExp.test(_param.ignoreColumns.join(""))) {
_param._postIgnoreColumns = true;
}
if (_param.includeColumns.length > 0 && !numExp.test(_param.includeColumns.join(""))) {
_param._postIncludeColumns = true;
}
if (_param.ignoreColumns.length || _param.includeColumns.length) {
_param._needFilterRow = true;
if (!_param._postIgnoreColumns){
_param.ignoreColumns.sort(function (a, b) { return b-a;});
}
}
return _param;
};
function getEnv(key, def) {
if (process.env[key]) {
return process.env[key];
} else {
return def;
}
}

View File

@@ -0,0 +1,7 @@
module.exports = [
require('./parser_array.js'),
require('./parser_json.js'),
require('./parser_omit.js'),
require('./parser_jsonarray.js'),
require("./parser_flat.js")
];

View File

@@ -0,0 +1,12 @@
module.exports = {
"name": "array",
"processSafe":true,
"regExp": /^\*array\*/,
"parserFunc": function parser_array(params) {
var fieldName = params.head.replace(this.regExp, '');
if (params.resultRow[fieldName] === undefined) {
params.resultRow[fieldName] = [];
}
params.resultRow[fieldName].push(params.item);
}
};

View File

@@ -0,0 +1,10 @@
module.exports = {
"name": "flat",
"processSafe": true,
"regExp": /^\*flat\*/,
"parserFunc": function parser_flat (params) {
var key = this.getHeadStr();
var val = params.item;
params.resultRow[key] = val;
}
};

View File

@@ -0,0 +1,70 @@
var arrReg = /\[([0-9]*)\]/;
function processHead(pointer, headArr, arrReg, flatKeys) {
var headStr, match, index;
while (headArr.length > 1) {
headStr = headArr.shift();
// match = headStr.match(arrReg);
match = flatKeys ? false : headStr.match(arrReg);
if (match) { //if its array, we need add an empty json object into specified index.
if (pointer[headStr.replace(match[0], '')] === undefined) {
pointer[headStr.replace(match[0], '')] = [];
}
index = match[1]; //get index where json object should stay
pointer = pointer[headStr.replace(match[0], '')];
if (index === '') { //if its dynamic array index, push to the end
index = pointer.length;
}
if (!pointer[index]) { //current index in the array is empty. we need create a new json object.
pointer[index] = {};
}
pointer = pointer[index];
} else { //not array, just normal JSON object. we get the reference of it
if (pointer[headStr] === undefined) {
pointer[headStr] = {};
}
pointer = pointer[headStr];
}
}
return pointer;
}
module.exports = {
"name": "json",
"processSafe": true,
"regExp": /^\*json\*/,
"parserFunc": function parser_json(params) {
var fieldStr = this.getHeadStr();
var headArr = (params.config && params.config.flatKeys) ? [fieldStr] : fieldStr.split('.');
var match, index, key;
//now the pointer is pointing the position to add a key/value pair.
var pointer = processHead(params.resultRow, headArr, arrReg, params.config && params.config.flatKeys);
key = headArr.shift();
match = (params.config && params.config.flatKeys) ? false : key.match(arrReg);
if (match) { // the last element is an array, we need check and treat it as an array.
try {
key = key.replace(match[0], '');
if (!pointer[key] || !(pointer[key] instanceof Array)) {
pointer[key] = [];
}
if (pointer[key]) {
index = match[1];
if (index === '') {
index = pointer[key].length;
}
pointer[key][index] = params.item;
} else {
params.resultRow[fieldStr] = params.item;
}
} catch (e) {
params.resultRow[fieldStr] = params.item;
}
} else {
if (typeof pointer === "string"){
params.resultRow[fieldStr] = params.item;
}else{
pointer[key] = params.item;
}
}
}
};

View File

@@ -0,0 +1,22 @@
module.exports = {
"name": "jsonarray",
"processSafe":true,
"regExp": /^\*jsonarray\*/,
"parserFunc": function parser_jsonarray (params) {
var fieldStr = params.head.replace(this.regExp, "");
var headArr = fieldStr.split('.');
var pointer = params.resultRow;
while (headArr.length > 1) {
var headStr = headArr.shift();
if (pointer[headStr] === undefined) {
pointer[headStr] = {};
}
pointer = pointer[headStr];
}
var arrFieldName = headArr.shift();
if (pointer[arrFieldName] === undefined) {
pointer[arrFieldName] = [];
}
pointer[arrFieldName].push(params.item);
}
};

View File

@@ -0,0 +1,6 @@
module.exports = {
"name": "omit",
"regExp": /^\*omit\*/,
"processSafe":true,
"parserFunc": function parser_omit() {}
};

View File

@@ -0,0 +1,15 @@
var csvline=require("./csvline");
/**
* Convert data chunk to csv lines with cols
* @param {[type]} data [description]
* @param {[type]} params [description]
* @return {[type]} {lines:[[col1,col2,col3]],partial:String}
*/
module.exports = function(fileLine, params) {
var lines = fileLine.lines;
var csvLines = csvline(lines,params);
return {
lines: csvLines.lines,
partial: csvLines.partial + fileLine.partial
};
};

View File

@@ -0,0 +1,13 @@
var getEol = require("./getEol");
/**
* convert data chunk to file lines array
* @param {string} data data chunk as utf8 string
* @param {object} param Converter param object
* @return {Object} {lines:[line1,line2...],partial:String}
*/
module.exports = function(data, param) {
var eol = getEol(data,param);
var lines = data.split(eol);
var partial = lines.pop();
return {lines: lines, partial: partial};
};

View File

@@ -0,0 +1,19 @@
module.exports=function filterRow(row, param) {
if (param.ignoreColumns instanceof Array && param.ignoreColumns.length > 0) {
for (var igRow = 0, igColLen = param.ignoreColumns.length; igRow < igColLen; igRow++) {
if (param.ignoreColumns[igRow] >= 0) {
row.splice(param.ignoreColumns[igRow], 1);
}
}
}
if (param.includeColumns instanceof Array && param.includeColumns.length > 0) {
var cleanRowArr = [];
for (var inRow = 0, inColLen = param.includeColumns.length; inRow < inColLen; inRow++) {
if (param.includeColumns[inRow] >= 0) {
cleanRowArr.push(row[param.includeColumns[inRow]]);
}
}
row = cleanRowArr;
}
return row;
}

View File

@@ -0,0 +1,22 @@
module.exports = getDelimiter;
var defaulDelimiters = [",", "|", "\t", ";", ":"];
function getDelimiter(rowStr,param) {
var checker;
if (param.delimiter === "auto"){
checker = defaulDelimiters;
} else if (param.delimiter instanceof Array) {
checker = param.delimiter;
} else {
return param.delimiter;
}
var count = 0;
var rtn = ",";
checker.forEach(function(delim) {
var delimCount = rowStr.split(delim).length;
if (delimCount > count) {
rtn = delim;
count = delimCount;
}
});
return rtn;
}

20
themes/keepit/node_modules/csvtojson/v1/core/getEol.js generated vendored Normal file
View File

@@ -0,0 +1,20 @@
//return eol from a data chunk.
var eol = require("os").EOL;
module.exports = function(data, param) {
if (!param.eol && data) {
for (var i = 0, len = data.length; i < len; i++) {
if (data[i] === "\r") {
if (data[i + 1] === "\n") {
param.eol = "\r\n";
} else if (data[i + 1]) {
param.eol = "\r";
}
return param.eol;
} else if (data[i] === "\n") {
param.eol = "\n";
return param.eol;
}
}
}
return param.eol;
};

View File

@@ -0,0 +1,6 @@
module.exports = constructor;
module.exports.Converter = require("./Converter.js");
function constructor(param,options) {
return new module.exports.Converter(param, options);
}

View File

@@ -0,0 +1,235 @@
var parserMgr = require("./parserMgr.js");
var CSVError = require("./CSVError");
var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/;
/**
* Convert lines of csv array into json
* @param {[type]} lines [[col1,col2,col3]]
* @param {[type]} params Converter params with _headers field populated
* @param {[type]} idx start pos of the lines
* @return {[type]} [{err:null,json:obj,index:line,row:[csv row]}]
*/
module.exports = function (lines, params, idx) {
if (params._needParseJson) {
if (!params._headers || !Array.isArray(params._headers)) {
params._headers = [];
}
if (!params.parseRules) {
var row = params._headers;
params.parseRules = parserMgr.initParsers(row, params);
}
return processRows(lines, params, idx);
} else {
return justReturnRows(lines, params, idx);
}
};
function justReturnRows(lines, params, idx) {
var rtn = [];
for (var i = 0, len = lines.length; i < len; i++) {
rtn.push({
err: null,
json: {},
index: idx++,
row: lines[i]
});
}
return rtn;
}
function processRows(csvRows, params, startIndex) {
var res = [];
for (var i = 0, len = csvRows.length; i < len; i++) {
var r = processRow(csvRows[i], params, startIndex++);
if (r) {
res.push(r);
}
}
return res;
}
function processRow(row, param, index) {
var parseRules = param.parseRules;
if (param.checkColumn && row.length !== parseRules.length) {
return {
err: CSVError.column_mismatched(index)
};
}
var headRow = param._headers;
var resultRow = convertRowToJson(row, headRow, param);
if (resultRow) {
return {
json: resultRow,
index: index,
row: row
};
} else {
return null;
}
}
function convertRowToJson(row, headRow, param) {
var hasValue = false;
var resultRow = {};
for (var i = 0, len = row.length; i < len; i++) {
var convertFunc, head, item;
item = row[i];
if (param.ignoreEmpty && item === '') {
continue;
}
hasValue = true;
head = headRow[i];
if (!head || head === "") {
head = headRow[i] = "field" + (i + 1);
}
var convFunc = getConvFunc(head, i, param);
if (convFunc) {
var convRes = convFunc(item, head, resultRow,row,i);
if (convRes !== undefined) {
setPath(resultRow, head, convRes);
}
} else {
var flag = getFlag(head, i, param);
if (flag === 'omit') {
continue;
}
if (param.checkType) {
convertFunc = checkType(item, head, i, param);
item = convertFunc(item);
}
var title = getTitle(head, i, param);
if (flag === 'flat' || param.flatKeys) {
resultRow[title] = item;
} else {
setPath(resultRow, title, item);
}
}
}
if (hasValue) {
return resultRow;
} else {
return false;
}
}
var builtInConv={
"string":stringType,
"number":numberType,
"omit":function(){}
}
function getConvFunc(head,i,param){
if (param._columnConv[i] !== undefined){
return param._columnConv[i];
}else{
var flag=param.colParser[head];
if (flag === undefined){
return param._columnConv[i]=false;
}
if (typeof flag ==="string"){
flag=flag.trim().toLowerCase();
var builtInFunc=builtInConv[flag];
if (builtInFunc){
return param._columnConv[i]=builtInFunc;
}else{
return param._columnConv[i]=false;
}
}else if (typeof flag ==="function"){
return param._columnConv[i]=flag;
}else{
return param._columnConv[i]=false;
}
}
}
function setPath(json, path, value) {
var _set = require('lodash/set');
var pathArr = path.split('.');
if (pathArr.length === 1) {
json[path] = value;
} else {
_set(json, path, value);
}
}
function getFlag(head, i, param) {
if (typeof param._headerFlag[i] === "string") {
return param._headerFlag[i];
} else if (head.indexOf('*omit*') > -1) {
return param._headerFlag[i] = 'omit';
} else if (head.indexOf('*flat*') > -1) {
return param._headerFlag[i] = 'flat';
} else {
return param._headerFlag[i] = '';
}
}
function getTitle(head, i, param) {
if (param._headerTitle[i]) {
return param._headerTitle[i];
}
var flag = getFlag(head, i, param);
var str = head.replace('*flat*', '').replace('string#!', '').replace('number#!', '');
return param._headerTitle[i] = str;
}
function checkType(item, head, headIdx, param) {
if (param._headerType[headIdx]) {
return param._headerType[headIdx];
} else if (head.indexOf('number#!') > -1) {
return param._headerType[headIdx] = numberType;
} else if (head.indexOf('string#!') > -1) {
return param._headerType[headIdx] = stringType;
} else if (param.checkType) {
return param._headerType[headIdx] = dynamicType;
} else {
return param._headerType[headIdx] = stringType;
}
}
function numberType(item) {
var rtn = parseFloat(item);
if (isNaN(rtn)) {
return item;
}
return rtn;
}
function stringType(item) {
return item.toString();
}
function dynamicType(item) {
var trimed = item.trim();
if (trimed === "") {
return stringType(item);
}
if (numReg.test(trimed)) {
return numberType(item);
} else if (trimed.length === 5 && trimed.toLowerCase() === "false" || trimed.length === 4 && trimed.toLowerCase() === "true") {
return booleanType(item);
} else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") {
return jsonType(item);
} else {
return stringType(item);
}
}
function booleanType(item) {
var trimed = item.trim();
if (trimed.length === 5 && trimed.toLowerCase() === "false") {
return false;
} else {
return true;
}
}
function jsonType(item) {
try {
return JSON.parse(item);
} catch (e) {
return item;
}
}

116
themes/keepit/node_modules/csvtojson/v1/core/parser.js generated vendored Normal file
View File

@@ -0,0 +1,116 @@
var explicitTypes = ["number", "string"];
function Parser(name, regExp, parser, processSafe) {
this.name = typeof name === "undefined" ? "Default" : name;
this.regExp = null;
this.type = "";
this.processSafe = processSafe;
if (typeof regExp !== "undefined") {
if (typeof regExp === "string") {
this.regExp = new RegExp(regExp);
} else {
this.regExp = regExp;
}
}
if (typeof parser !== "undefined") {
this.parse = parser;
}
}
// var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/;
Parser.prototype.convertType = function(item) {
var type=this.type;
if (type === 'number') {
var rtn = parseFloat(item);
if (isNaN(rtn)) {
return 0;
} else {
return rtn;
}
} else if (this.param && this.param.checkType && type === '') {
var trimed = item.trim();
if (trimed === ""){
return trimed;
}
if (!isNaN(trimed)) {
return parseFloat(trimed);
} else if (trimed.length === 5 && trimed.toLowerCase() === "false") {
return false;
} else if (trimed.length === 4 && trimed.toLowerCase() === "true") {
return true;
} else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1]==="]") {
try {
return JSON.parse(trimed);
} catch (e) {
return item;
}
} else {
return item;
}
}
return item;
};
Parser.prototype.setParam = function(param) {
this.param = param;
};
Parser.prototype.test = function(str) {
return this.regExp && this.regExp.test(str);
};
Parser.prototype.parse = function(params) {
params.resultRow[params.head] = params.item;
};
Parser.prototype.getHeadStr = function() {
if (this.headStr) {
return this.headStr;
} else {
var head = this.head;
this.headStr = head.replace(this.regExp, '');
if (!this.headStr) {
this.headStr = "Unknown Header";
}
return this.getHeadStr();
}
};
Parser.prototype.getHead = function() {
return this.head;
};
Parser.prototype.initHead = function(columnTitle) {
this.head = columnTitle;
var wholeHead = columnTitle.replace(this.regExp, '');
//init type && headStr
var splitArr = wholeHead.split("#!");
if (splitArr.length === 1) { //no explicit type
this.headStr = splitArr[0];
} else {
var type = splitArr.shift();
if (explicitTypes.indexOf(type.toLowerCase()) > -1) {
this.type = type;
this.headStr = splitArr.join("#!");
} else { //no explicit type
this.headStr = wholeHead;
}
}
if (!this.headStr) {
this.headStr = wholeHead ? wholeHead : "Unknown Head";
}
};
Parser.prototype.clone = function() {
var obj = Object.create(this);
var newParser = new Parser();
for (var key in obj) {
newParser[key] = obj[key];
}
return newParser;
};
Parser.prototype.getName = function() {
return this.name;
};
module.exports = Parser;

View File

@@ -0,0 +1,69 @@
//implementation
var registeredParsers = [];
var Parser = require("./parser.js");
var defaultParser = require("./defaultParsers");
function registerParser (parser) {
if (parser instanceof Parser && registeredParsers.indexOf(parser) === -1) {
registeredParsers.push(parser); // TODO indexOf doesn't work with object references
}
}
function getParser(columnTitle, param) {
var inst, parser;
function getParserByName(parserName) {
var parser;
registeredParsers.forEach(function(p){
if (p.getName() === parserName){
parser = p;
}
});
if (parser) {
var inst = parser.clone();
return inst;
}
return new Parser(); //TODO remove new
}
columnTitle = columnTitle ? columnTitle : '';
registeredParsers.forEach(function(p) {
if (p.test(columnTitle)) {
parser=p;
}
});
if (parser) {
inst = parser.clone();
inst.head = columnTitle;
} else {
inst = getParserByName("json", columnTitle);
}
inst.setParam(param);
inst.initHead(columnTitle);
return inst;
}
function addParser(name, regExp, parseFunc) {
var parser = new Parser(name, regExp, parseFunc,false); //TODO remove new
registerParser(parser);
}
function addSafeParser(parserPath) {
//TODO impl
}
function initParsers(row, param) {
var parsers = [];
row.forEach(function (columnTitle) {
parsers.push(getParser(columnTitle, param));
});
return parsers;
}
defaultParser.forEach(function (parserCfg){
//TODO refactor this
addParser(parserCfg.name, parserCfg.regExp, parserCfg.parserFunc, parserCfg.processSafe);
});
//module interfaces
module.exports.addParser = addParser;
module.exports.initParsers = initParsers;
module.exports.getParser = getParser;

View File

@@ -0,0 +1,131 @@
var getDelimiter = require("./getDelimiter");
var filterRow=require("./filterRow");
/**
* Convert a line of string to csv columns according to its delimiter
* the param._header may not be ready when this is called.
* @param {[type]} rowStr [description]
* @param {[type]} param [Converter param]
* @return {[type]} {cols:["a","b","c"],closed:boolean} the closed field indicate if the row is a complete row
*/
module.exports = function rowSplit(rowStr, param) {
if (rowStr === "") {
return { cols: [], closed: true };
}
var quote = param.quote;
var trim = param.trim;
var escape = param.escape;
if (param.delimiter instanceof Array || param.delimiter.toLowerCase() === "auto") {
param.delimiter = getDelimiter(rowStr, param);
}
var delimiter = param.delimiter;
var rowArr = rowStr.split(delimiter);
if (quote === "off") {
return { cols: rowArr, closed: true };
}
var row = [];
var inquote = false;
var quoteBuff = '';
for (var i = 0, rowLen = rowArr.length; i < rowLen; i++) {
var e = rowArr[i];
if (!inquote && trim) {
e = e.trim();
}
var len = e.length;
if (!inquote) {
if (isQuoteOpen(e, param)) { //quote open
e = e.substr(1);
if (isQuoteClose(e, param)) { //quote close
e = e.substring(0, e.length - 1);
e = _escapeQuote(e, quote, escape);
row.push(e);
continue;
} else {
inquote = true;
quoteBuff += e;
continue;
}
} else {
row.push(e);
continue;
}
} else { //previous quote not closed
if (isQuoteClose(e, param)) { //close double quote
inquote = false;
e = e.substr(0, len - 1);
quoteBuff += delimiter + e;
quoteBuff = _escapeQuote(quoteBuff, quote, escape);
if (trim) {
quoteBuff = quoteBuff.replace(/\s+$/, "");
}
row.push(quoteBuff);
quoteBuff = "";
} else {
quoteBuff += delimiter + e;
}
}
}
if (!inquote && param._needFilterRow) {
row = filterRow(row, param);
}
return { cols: row, closed: !inquote };
// if (param.workerNum<=1){
// }else{
// if (inquote && quoteBuff.length>0){//for multi core, quote will be closed at the end of line
// quoteBuff=_escapeQuote(quoteBuff,quote,escape);;
// if (trim){
// quoteBuff=quoteBuff.trimRight();
// }
// row.push(quoteBuff);
// }
// return {cols:row,closed:true};
// }
};
function isQuoteOpen(str, param) {
var quote = param.quote;
var escape = param.escape;
return str[0] === quote && (
str[1] !== quote ||
str[1] === escape && (str[2] === quote || str.length === 2));
}
function isQuoteClose(str, param) {
var quote = param.quote;
var count = 0;
var idx = str.length - 1;
var escape = param.escape;
while (str[idx] === quote || str[idx] === escape) {
idx--;
count++;
}
return count % 2 !== 0;
}
function twoDoubleQuote(str, quote) {
var twoQuote = quote + quote;
var curIndex = -1;
while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) {
str = str.substring(0, curIndex) + str.substring(++curIndex);
}
return str;
}
var cachedRegExp = {};
function _escapeQuote(segment, quote, escape) {
var key = "es|" + quote + "|" + escape;
if (cachedRegExp[key] === undefined) {
// if (escape === "\\") {
// escape = "\\\\";
// }
cachedRegExp[key] = new RegExp('\\'+escape + '\\'+quote, 'g');
}
var regExp = cachedRegExp[key];
// console.log(regExp,segment);
return segment.replace(regExp, quote);
}

84
themes/keepit/node_modules/csvtojson/v1/core/worker.js generated vendored Normal file
View File

@@ -0,0 +1,84 @@
var param = null;
var fileLine = require("./fileline");
var csvline = require("./csvline");
var linesToJson = require("./linesToJson");
var CSVError = require('./CSVError');
var eom = "\x03";
var eom1 = "\x0e";
var eom2 = "\x0f";
/**
* message is like :
* 0{"a":"b"}
* 13345|a,b,c
* <cmd><data>
* <cmd> is 0-9
*/
var buffer="";
process.stdin.on("data", function(d) {
var str = d.toString("utf8");
var all = buffer + str;
var cmdArr = all.split(eom);
while (cmdArr.length > 1) {
processMsg(cmdArr.shift());
}
buffer = cmdArr[0];
});
process.on("message", processMsg);
function processMsg(msg) {
if (msg) {
var cmd = msg[0];
var data = msg.substr(1);
switch (cmd) {
case "0":
initParams(data);
break;
case "1":
processData(data);
break;
default:
console.error("Unknown command: " + msg);
}
}
}
function initParams(data) {
param = JSON.parse(data);
}
/**
* e.g.
* 1023|a,b,c,d\ne,f,g,h\n
* <start line number>|<raw csv data>
*/
function processData(data) {
if (!param){
console.error("Parameter not initialised when processing data.");
process.exit(1);
}
var sepIdx = data.indexOf("|");
var startIdx = parseInt(data.substr(0, sepIdx));
var csvData = data.substr(sepIdx + 1);
var lines = fileLine(csvData,param); //convert to file lines.
// process.send("0"+lines.lines.length+"|"+lines.partial);
var csvLines = csvline(lines.lines,param);
var res = linesToJson(csvLines.lines,param,startIdx);
if (csvLines.partial) {
var lastIdx = res.length > 0 ? res[res.length - 1].index + 1 : startIdx;
res.push({
err:CSVError.unclosed_quote(lastIdx, csvLines.partial)
});
}
// console.error(res)
//1<line num>|^<row>|^err|^data|&<line num>|^<row>|^err|^data
var str="1";
res.forEach(function(item) {
var errStr = item.err ? item.err.toString() : "";
str += item.index + eom2 + JSON.stringify(item.row) + eom2 + errStr + eom2 + JSON.stringify(item.json) + eom1;
});
sendData(str);
}
function sendData(str) {
process.stdout.write(str + eom);
}

View File

@@ -0,0 +1,150 @@
module.exports = workerMgr;
var eom = "\x03";
var eom1 = "\x0e";
var eom2 = "\x0f";
var CSVError = require('./CSVError');
function workerMgr() {
var exports = {
initWorker: initWorker,
sendWorker: sendWorker,
setParams: setParams,
drain: function(){},
isRunning: isRunning,
destroyWorker: destroyWorker
};
var workers = [];
var running = 0;
var waiting = null;
function initWorker(num, params) {
workers = [];
running = 0;
waiting = null;
for (var i = 0; i < num; i++) {
workers.push(new Worker(params));
}
}
function isRunning() {
return running > 0;
}
function destroyWorker() {
workers.forEach(function(w) {
w.destroy();
});
}
function sendWorker(data, startIdx, transformCb, cbResult) {
if (workers.length > 0) {
var worker = workers.shift();
running++;
worker.parse(data, startIdx, function(result) {
// var arr=JSON.parse(result);
// arr.forEach(function(item){
// console.log('idx',item.index)
// })
workers.push(worker);
cbResult(result, startIdx);
running--;
if (waiting === null && running === 0) {
exports.drain();
} else if (waiting) {
sendWorker.apply(this, waiting);
waiting = null;
}
});
process.nextTick(transformCb);
} else {
waiting = [data, startIdx, transformCb, cbResult];
}
}
function setParams(params) {
workers.forEach(function(w) {
w.setParams(params);
});
}
return exports;
}
function Worker(params) {
var spawn = require("child_process").spawn;
this.cp = spawn(process.execPath, [__dirname + "/worker.js"], {
env: {
child:true
},
stdio:['pipe', 'pipe', 2, 'ipc']
// stdio:[0,1,2,'ipc']
});
this.setParams(params);
this.cp.on("message", this.onChildMsg.bind(this));
this.buffer = "";
var self = this;
this.cp.stdout.on("data", function(d) {
var str = d.toString("utf8");
var all = self.buffer + str;
var cmdArr = all.split(eom);
while (cmdArr.length > 1) {
self.onChildMsg(cmdArr.shift());
}
self.buffer = cmdArr[0];
});
}
Worker.prototype.setParams = function(params) {
var msg = "0" + JSON.stringify(params);
this.sendMsg(msg);
};
/**
* msg is like:
* <cmd><data>
* cmd is from 0-9
*/
Worker.prototype.onChildMsg = function(msg) {
if (msg) {
var cmd = msg[0];
var data = msg.substr(1);
switch (cmd) {
case "0": //total line number of current chunk
if (this.cbLine) {
var sp = data.split("|");
var len = parseInt(sp[0]);
var partial = sp[1];
this.cbLine(len, partial);
}
break;
case "1": // json array of current chunk
if (this.cbResult) {
var rows = data.split(eom1);
rows.pop();
var res = [];
rows.forEach(function(row) {
var sp = row.split(eom2);
res.push({
index: sp[0],
row: sp[1],
err: sp[2] ? CSVError.fromArray(JSON.parse(sp[2])) : null,
json: sp[3]
});
});
this.cbResult(res);
}
break;
}
}
};
Worker.prototype.parse = function(data, startIdx, cbResult) {
this.cbResult = cbResult;
var msg = "1" + startIdx + "|" + data;
this.sendMsg(msg);
};
Worker.prototype.destroy = function() {
this.cp.kill();
};
Worker.prototype.sendMsg = function(msg) {
this.cp.stdin.write(msg + eom, "utf8");
// this.cp.send(msg)
};

6
themes/keepit/node_modules/csvtojson/v1/index.js generated vendored Normal file
View File

@@ -0,0 +1,6 @@
//deprecated but leave it for backward compatibility
module.exports.core=require("./core");
//live apis
module.exports=require("./core");
module.exports.interfaces = require("./interfaces");

View File

@@ -0,0 +1 @@
module.exports = require("./main.js");

View File

@@ -0,0 +1,42 @@
/**
* Convert input to process stdout
*/
//implementation
var Converter = require("../../core/Converter.js");
function _initConverter(){
var csvConverter = new Converter();
var started = false;
var writeStream = process.stdout;
csvConverter.on("record_parsed",function(rowJSON){
if (started){
writeStream.write(",\n");
}
writeStream.write(JSON.stringify(rowJSON)); //write parsed JSON object one by one.
if (started === false){
started = true;
}
});
writeStream.write("[\n"); //write array symbol
csvConverter.on("end_parsed",function(){
writeStream.write("\n]"); //end array symbol
});
csvConverter.on("error",function(err){
console.error(err);
process.exit(-1);
});
return csvConverter;
}
function convertFile(fileName){
var csvConverter=_initConverter();
csvConverter.from(fileName);
}
function convertString(csvString){
var csvConverter=_initConverter();
csvConverter.from(csvString);
}
//module interfaces
module.exports.convertFile = convertFile;
module.exports.convertString = convertString;

View File

@@ -0,0 +1,2 @@
module.exports.web=require("./web");
module.exports.cli=require("./cli");

View File

@@ -0,0 +1 @@
module.exports = require("./webServer.js");

View File

@@ -0,0 +1,23 @@
var http = require("http");
var Converter = require("../../core/Converter.js");
function startWebServer (args) {
args = args || {};
var serverArgs = {
port: args.port || '8801',
urlpath: args.urlpath || '/parseCSV'
};
var server = http.createServer();
server.on("request", function(req, res){
if (req.url === serverArgs.urlpath && req.method === "POST"){
req.pipe(new Converter({constructResult:false})).pipe(res);
} else {
res.end("Please post data to: " + serverArgs.urlpath);
}
});
server.listen(serverArgs.port);
console.log("CSV Web Server Listen On:" + serverArgs.port);
console.log("POST to " + serverArgs.urlpath + " with CSV data to get parsed.");
return server;
}
module.exports.startWebServer = startWebServer;