1 | ;
|
2 |
|
3 | let path = require('doc-path'),
|
4 | constants = require('./constants.json'),
|
5 | utils = require('./utils');
|
6 |
|
7 | const Csv2Json = function(options) {
|
8 | const escapedWrapDelimiterRegex = new RegExp(options.delimiter.wrap + options.delimiter.wrap, 'g'),
|
9 | excelBOMRegex = new RegExp('^' + constants.values.excelBOM);
|
10 |
|
11 | /**
|
12 | * Trims the header key, if specified by the user via the provided options
|
13 | * @param headerKey
|
14 | * @returns {*}
|
15 | */
|
16 | function processHeaderKey(headerKey) {
|
17 | headerKey = removeWrapDelimitersFromValue(headerKey);
|
18 | if (options.trimHeaderFields) {
|
19 | return headerKey.split('.')
|
20 | .map((component) => component.trim())
|
21 | .join('.');
|
22 | }
|
23 | return headerKey;
|
24 | }
|
25 |
|
26 | /**
|
27 | * Generate the JSON heading from the CSV
|
28 | * @param lines {String[]} csv lines split by EOL delimiter
|
29 | * @returns {*}
|
30 | */
|
31 | function retrieveHeading(lines) {
|
32 | let params = {lines},
|
33 | // Generate and return the heading keys
|
34 | headerRow = params.lines[0];
|
35 | params.headerFields = headerRow.map((headerKey, index) => ({
|
36 | value: processHeaderKey(headerKey),
|
37 | index: index
|
38 | }));
|
39 |
|
40 | // If the user provided keys, filter the generated keys to just the user provided keys so we also have the key index
|
41 | if (options.keys) {
|
42 | params.headerFields = params.headerFields.filter((headerKey) => options.keys.includes(headerKey.value));
|
43 | }
|
44 |
|
45 | return params;
|
46 | }
|
47 |
|
48 | /**
|
49 | * Splits the lines of the CSV string by the EOL delimiter and resolves and array of strings (lines)
|
50 | * @param csv
|
51 | * @returns {Promise.<String[]>}
|
52 | */
|
53 | function splitCsvLines(csv) {
|
54 | return Promise.resolve(splitLines(csv));
|
55 | }
|
56 |
|
57 | /**
|
58 | * Removes the Excel BOM value, if specified by the options object
|
59 | * @param csv
|
60 | * @returns {Promise.<String>}
|
61 | */
|
62 | function stripExcelBOM(csv) {
|
63 | if (options.excelBOM) {
|
64 | return Promise.resolve(csv.replace(excelBOMRegex, ''));
|
65 | }
|
66 | return Promise.resolve(csv);
|
67 | }
|
68 |
|
69 | /**
|
70 | * Helper function that splits a line so that we can handle wrapped fields
|
71 | * @param csv
|
72 | */
|
73 | function splitLines(csv) {
|
74 | // Parse out the line...
|
75 | let lines = [],
|
76 | splitLine = [],
|
77 | character,
|
78 | charBefore,
|
79 | charAfter,
|
80 | nextNChar,
|
81 | lastCharacterIndex = csv.length - 1,
|
82 | eolDelimiterLength = options.delimiter.eol.length,
|
83 | stateVariables = {
|
84 | insideWrapDelimiter: false,
|
85 | parsingValue: true,
|
86 | justParsedDoubleQuote: false,
|
87 | startIndex: 0
|
88 | },
|
89 | index = 0;
|
90 |
|
91 | // Loop through each character in the line to identify where to split the values
|
92 | while (index < csv.length) {
|
93 | // Current character
|
94 | character = csv[index];
|
95 | // Previous character
|
96 | charBefore = index ? csv[index - 1] : '';
|
97 | // Next character
|
98 | charAfter = index < lastCharacterIndex ? csv[index + 1] : '';
|
99 | // Next n characters, including the current character, where n = length(EOL delimiter)
|
100 | // This allows for the checking of an EOL delimiter when if it is more than a single character (eg. '\r\n')
|
101 | nextNChar = utils.getNCharacters(csv, index, eolDelimiterLength);
|
102 |
|
103 | if ((nextNChar === options.delimiter.eol && !stateVariables.insideWrapDelimiter ||
|
104 | index === lastCharacterIndex) && charBefore === options.delimiter.field) {
|
105 | // If we reached an EOL delimiter or the end of the csv and the previous character is a field delimiter...
|
106 |
|
107 | // If the start index is the current index (and since the previous character is a comma),
|
108 | // then the value being parsed is an empty value accordingly, add an empty string
|
109 | let parsedValue = nextNChar === options.delimiter.eol && stateVariables.startIndex === index
|
110 | ? ''
|
111 | // Otherwise, there's a valid value, and the start index isn't the current index, grab the whole value
|
112 | : csv.substr(stateVariables.startIndex);
|
113 |
|
114 | // Push the value for the field that we were parsing
|
115 | splitLine.push(parsedValue);
|
116 |
|
117 | // Since the last character is a comma, there's still an additional implied field value trailing the comma.
|
118 | // Since this value is empty, we push an extra empty value
|
119 | splitLine.push('');
|
120 |
|
121 | // Finally, push the split line values into the lines array and clear the split line
|
122 | lines.push(splitLine);
|
123 | splitLine = [];
|
124 | stateVariables.startIndex = index + eolDelimiterLength;
|
125 | stateVariables.parsingValue = true;
|
126 | stateVariables.insideWrapDelimiter = charAfter === options.delimiter.wrap;
|
127 | } else if (index === lastCharacterIndex || nextNChar === options.delimiter.eol &&
|
128 | // if we aren't inside wrap delimiters or if we are but the character before was a wrap delimiter and we didn't just see two
|
129 | (!stateVariables.insideWrapDelimiter ||
|
130 | stateVariables.insideWrapDelimiter && charBefore === options.delimiter.wrap && !stateVariables.justParsedDoubleQuote)) {
|
131 | // Otherwise if we reached the end of the line or csv (and current character is not a field delimiter)
|
132 |
|
133 | let toIndex = index !== lastCharacterIndex || charBefore === options.delimiter.wrap ? index : undefined;
|
134 |
|
135 | // Retrieve the remaining value and add it to the split line list of values
|
136 | splitLine.push(csv.substring(stateVariables.startIndex, toIndex));
|
137 |
|
138 | // Finally, push the split line values into the lines array and clear the split line
|
139 | lines.push(splitLine);
|
140 | splitLine = [];
|
141 | stateVariables.startIndex = index + eolDelimiterLength;
|
142 | stateVariables.parsingValue = true;
|
143 | stateVariables.insideWrapDelimiter = charAfter === options.delimiter.wrap;
|
144 | } else if ((charBefore !== options.delimiter.wrap || stateVariables.justParsedDoubleQuote && charBefore === options.delimiter.wrap) &&
|
145 | character === options.delimiter.wrap && utils.getNCharacters(csv, index + 1, eolDelimiterLength) === options.delimiter.eol) {
|
146 | // If we reach a wrap which is not preceded by a wrap delim and the next character is an EOL delim (ie. *"\n)
|
147 |
|
148 | stateVariables.insideWrapDelimiter = false;
|
149 | stateVariables.parsingValue = false;
|
150 | // Next iteration will substring, add the value to the line, and push the line onto the array of lines
|
151 | } else if (character === options.delimiter.wrap && (index === 0 || utils.getNCharacters(csv, index - 1, eolDelimiterLength) === options.delimiter.eol)) {
|
152 | // If the line starts with a wrap delimiter (ie. "*)
|
153 |
|
154 | stateVariables.insideWrapDelimiter = true;
|
155 | stateVariables.parsingValue = true;
|
156 | stateVariables.startIndex = index;
|
157 | } else if (character === options.delimiter.wrap && charAfter === options.delimiter.field) {
|
158 | // If we reached a wrap delimiter with a field delimiter after it (ie. *",)
|
159 |
|
160 | splitLine.push(csv.substring(stateVariables.startIndex, index + 1));
|
161 | stateVariables.startIndex = index + 2; // next value starts after the field delimiter
|
162 | stateVariables.insideWrapDelimiter = false;
|
163 | stateVariables.parsingValue = false;
|
164 | } else if (character === options.delimiter.wrap && charBefore === options.delimiter.field &&
|
165 | !stateVariables.insideWrapDelimiter && !stateVariables.parsingValue) {
|
166 | // If we reached a wrap delimiter after a comma and we aren't inside a wrap delimiter
|
167 |
|
168 | stateVariables.startIndex = index;
|
169 | stateVariables.insideWrapDelimiter = true;
|
170 | stateVariables.parsingValue = true;
|
171 | } else if (character === options.delimiter.wrap && charBefore === options.delimiter.field &&
|
172 | !stateVariables.insideWrapDelimiter && stateVariables.parsingValue) {
|
173 | // If we reached a wrap delimiter with a field delimiter after it (ie. ,"*)
|
174 |
|
175 | splitLine.push(csv.substring(stateVariables.startIndex, index - 1));
|
176 | stateVariables.insideWrapDelimiter = true;
|
177 | stateVariables.parsingValue = true;
|
178 | stateVariables.startIndex = index;
|
179 | } else if (character === options.delimiter.wrap && charAfter === options.delimiter.wrap) {
|
180 | // If we run into an escaped quote (ie. "") skip past the second quote
|
181 |
|
182 | index += 2;
|
183 | stateVariables.justParsedDoubleQuote = true;
|
184 | continue;
|
185 | } else if (character === options.delimiter.field && charBefore !== options.delimiter.wrap &&
|
186 | charAfter !== options.delimiter.wrap && !stateVariables.insideWrapDelimiter &&
|
187 | stateVariables.parsingValue) {
|
188 | // If we reached a field delimiter and are not inside the wrap delimiters (ie. *,*)
|
189 |
|
190 | splitLine.push(csv.substring(stateVariables.startIndex, index));
|
191 | stateVariables.startIndex = index + 1;
|
192 | } else if (character === options.delimiter.field && charBefore === options.delimiter.wrap &&
|
193 | charAfter !== options.delimiter.wrap && !stateVariables.parsingValue) {
|
194 | // If we reached a field delimiter, the previous character was a wrap delimiter, and the
|
195 | // next character is not a wrap delimiter (ie. ",*)
|
196 |
|
197 | stateVariables.insideWrapDelimiter = false;
|
198 | stateVariables.parsingValue = true;
|
199 | stateVariables.startIndex = index + 1;
|
200 | }
|
201 | // Otherwise increment to the next character
|
202 | index++;
|
203 | // Reset the double quote state variable
|
204 | stateVariables.justParsedDoubleQuote = false;
|
205 | }
|
206 |
|
207 | return lines;
|
208 | }
|
209 |
|
210 | /**
|
211 | * Retrieves the record lines from the split CSV lines and sets it on the params object
|
212 | * @param params
|
213 | * @returns {*}
|
214 | */
|
215 | function retrieveRecordLines(params) {
|
216 | params.recordLines = params.lines.splice(1); // All lines except for the header line
|
217 |
|
218 | return params;
|
219 | }
|
220 |
|
221 | /**
|
222 | * Retrieves the value for the record from the line at the provided key.
|
223 | * @param line {String[]} split line values for the record
|
224 | * @param key {Object} {index: Number, value: String}
|
225 | */
|
226 | function retrieveRecordValueFromLine(line, key) {
|
227 | // If there is a value at the key's index, use it; otherwise, null
|
228 | let value = line[key.index];
|
229 |
|
230 | // Perform any necessary value conversions on the record value
|
231 | return processRecordValue(value);
|
232 | }
|
233 |
|
234 | /**
|
235 | * Processes the record's value by parsing the data to ensure the CSV is
|
236 | * converted to the JSON that created it.
|
237 | * @param fieldValue {String}
|
238 | * @returns {*}
|
239 | */
|
240 | function processRecordValue(fieldValue) {
|
241 | // If the value is an array representation, convert it
|
242 | let parsedJson = parseValue(fieldValue);
|
243 | // If parsedJson is anything aside from an error, then we want to use the parsed value
|
244 | // This allows us to interpret values like 'null' --> null, 'false' --> false
|
245 | if (!utils.isError(parsedJson)) {
|
246 | fieldValue = parsedJson;
|
247 | } else if (fieldValue === 'undefined') {
|
248 | fieldValue = undefined;
|
249 | }
|
250 |
|
251 | return fieldValue;
|
252 | }
|
253 |
|
254 | /**
|
255 | * Trims the record value, if specified by the user via the options object
|
256 | * @param fieldValue
|
257 | * @returns {String|null}
|
258 | */
|
259 | function trimRecordValue(fieldValue) {
|
260 | if (options.trimFieldValues && !utils.isNull(fieldValue)) {
|
261 | return fieldValue.trim();
|
262 | }
|
263 | return fieldValue;
|
264 | }
|
265 |
|
266 | /**
|
267 | * Create a JSON document with the given keys (designated by the CSV header)
|
268 | * and the values (from the given line)
|
269 | * @param keys String[]
|
270 | * @param line String
|
271 | * @returns {Object} created json document
|
272 | */
|
273 | function createDocument(keys, line) {
|
274 | // Reduce the keys into a JSON document representing the given line
|
275 | return keys.reduce((document, key) => {
|
276 | // If there is a value at the key's index in the line, set the value; otherwise null
|
277 | let value = retrieveRecordValueFromLine(line, key);
|
278 |
|
279 | // Otherwise add the key and value to the document
|
280 | return path.setPath(document, key.value, value);
|
281 | }, {});
|
282 | }
|
283 |
|
284 | /**
|
285 | * Removes the outermost wrap delimiters from a value, if they are present
|
286 | * Otherwise, the non-wrapped value is returned as is
|
287 | * @param fieldValue
|
288 | * @returns {String}
|
289 | */
|
290 | function removeWrapDelimitersFromValue(fieldValue) {
|
291 | let firstChar = fieldValue[0],
|
292 | lastIndex = fieldValue.length - 1,
|
293 | lastChar = fieldValue[lastIndex];
|
294 | // If the field starts and ends with a wrap delimiter
|
295 | if (firstChar === options.delimiter.wrap && lastChar === options.delimiter.wrap) {
|
296 | return fieldValue.substr(1, lastIndex - 1);
|
297 | }
|
298 | return fieldValue;
|
299 | }
|
300 |
|
301 | /**
|
302 | * Unescapes wrap delimiters by replacing duplicates with a single (eg. "" -> ")
|
303 | * This is done in order to parse RFC 4180 compliant CSV back to JSON
|
304 | * @param fieldValue
|
305 | * @returns {String}
|
306 | */
|
307 | function unescapeWrapDelimiterInField(fieldValue) {
|
308 | return fieldValue.replace(escapedWrapDelimiterRegex, options.delimiter.wrap);
|
309 | }
|
310 |
|
311 | /**
|
312 | * Main helper function to convert the CSV to the JSON document array
|
313 | * @param params {Object} {lines: [String], callback: Function}
|
314 | * @returns {Array}
|
315 | */
|
316 | function transformRecordLines(params) {
|
317 | params.json = params.recordLines.reduce((generatedJsonObjects, line) => { // For each line, create the document and add it to the array of documents
|
318 | line = line.map((fieldValue) => {
|
319 | // Perform the necessary operations on each line
|
320 | fieldValue = removeWrapDelimitersFromValue(fieldValue);
|
321 | fieldValue = unescapeWrapDelimiterInField(fieldValue);
|
322 | fieldValue = trimRecordValue(fieldValue);
|
323 |
|
324 | return fieldValue;
|
325 | });
|
326 |
|
327 | let generatedDocument = createDocument(params.headerFields, line);
|
328 | return generatedJsonObjects.concat(generatedDocument);
|
329 | }, []);
|
330 |
|
331 | return params;
|
332 | }
|
333 |
|
334 | /**
|
335 | * Attempts to parse the provided value. If it is not parsable, then an error is returned
|
336 | * @param value
|
337 | * @returns {*}
|
338 | */
|
339 | function parseValue(value) {
|
340 | try {
|
341 | if (utils.isStringRepresentation(value, options) && !utils.isDateRepresentation(value)) {
|
342 | return value;
|
343 | }
|
344 |
|
345 | let parsedJson = JSON.parse(value);
|
346 |
|
347 | // If the parsed value is an array, then we also need to trim record values, if specified
|
348 | if (Array.isArray(parsedJson)) {
|
349 | return parsedJson.map(trimRecordValue);
|
350 | }
|
351 |
|
352 | return parsedJson;
|
353 | } catch (err) {
|
354 | return err;
|
355 | }
|
356 | }
|
357 |
|
358 | /**
|
359 | * Internally exported csv2json function
|
360 | * Takes options as a document, data as a CSV string, and a callback that will be used to report the results
|
361 | * @param data String csv string
|
362 | * @param callback Function callback function
|
363 | */
|
364 | function convert(data, callback) {
|
365 | // Split the CSV into lines using the specified EOL option
|
366 | // validateCsv(data, callback)
|
367 | // .then(stripExcelBOM)
|
368 | stripExcelBOM(data)
|
369 | .then(splitCsvLines)
|
370 | .then(retrieveHeading) // Retrieve the headings from the CSV, unless the user specified the keys
|
371 | .then(retrieveRecordLines) // Retrieve the record lines from the CSV
|
372 | .then(transformRecordLines) // Retrieve the JSON document array
|
373 | .then((params) => callback(null, params.json)) // Send the data back to the caller
|
374 | .catch(callback);
|
375 | }
|
376 |
|
377 | return {
|
378 | convert,
|
379 | validationFn: utils.isString,
|
380 | validationMessages: constants.errors.csv2json
|
381 | };
|
382 | };
|
383 |
|
384 | module.exports = { Csv2Json };
|