UNPKG

16.8 kBJavaScriptView Raw
1'use strict';
2
3let path = require('doc-path'),
4 constants = require('./constants.json'),
5 utils = require('./utils');
6
7const Csv2Json = function(options) {
8 const escapedWrapDelimiterRegex = new RegExp(options.delimiter.wrap + options.delimiter.wrap, 'g'),
9 excelBOMRegex = new RegExp('^' + constants.values.excelBOM);
10
11 /**
12 * Trims the header key, if specified by the user via the provided options
13 * @param headerKey
14 * @returns {*}
15 */
16 function processHeaderKey(headerKey) {
17 headerKey = removeWrapDelimitersFromValue(headerKey);
18 if (options.trimHeaderFields) {
19 return headerKey.split('.')
20 .map((component) => component.trim())
21 .join('.');
22 }
23 return headerKey;
24 }
25
26 /**
27 * Generate the JSON heading from the CSV
28 * @param lines {String[]} csv lines split by EOL delimiter
29 * @returns {*}
30 */
31 function retrieveHeading(lines) {
32 let params = {lines},
33 // Generate and return the heading keys
34 headerRow = params.lines[0];
35 params.headerFields = headerRow.map((headerKey, index) => ({
36 value: processHeaderKey(headerKey),
37 index: index
38 }));
39
40 // If the user provided keys, filter the generated keys to just the user provided keys so we also have the key index
41 if (options.keys) {
42 params.headerFields = params.headerFields.filter((headerKey) => options.keys.includes(headerKey.value));
43 }
44
45 return params;
46 }
47
48 /**
49 * Splits the lines of the CSV string by the EOL delimiter and resolves and array of strings (lines)
50 * @param csv
51 * @returns {Promise.<String[]>}
52 */
53 function splitCsvLines(csv) {
54 return Promise.resolve(splitLines(csv));
55 }
56
57 /**
58 * Removes the Excel BOM value, if specified by the options object
59 * @param csv
60 * @returns {Promise.<String>}
61 */
62 function stripExcelBOM(csv) {
63 if (options.excelBOM) {
64 return Promise.resolve(csv.replace(excelBOMRegex, ''));
65 }
66 return Promise.resolve(csv);
67 }
68
69 /**
70 * Helper function that splits a line so that we can handle wrapped fields
71 * @param csv
72 */
73 function splitLines(csv) {
74 // Parse out the line...
75 let lines = [],
76 splitLine = [],
77 character,
78 charBefore,
79 charAfter,
80 nextNChar,
81 lastCharacterIndex = csv.length - 1,
82 eolDelimiterLength = options.delimiter.eol.length,
83 stateVariables = {
84 insideWrapDelimiter: false,
85 parsingValue: true,
86 justParsedDoubleQuote: false,
87 startIndex: 0
88 },
89 index = 0;
90
91 // Loop through each character in the line to identify where to split the values
92 while (index < csv.length) {
93 // Current character
94 character = csv[index];
95 // Previous character
96 charBefore = index ? csv[index - 1] : '';
97 // Next character
98 charAfter = index < lastCharacterIndex ? csv[index + 1] : '';
99 // Next n characters, including the current character, where n = length(EOL delimiter)
100 // This allows for the checking of an EOL delimiter when if it is more than a single character (eg. '\r\n')
101 nextNChar = utils.getNCharacters(csv, index, eolDelimiterLength);
102
103 if ((nextNChar === options.delimiter.eol && !stateVariables.insideWrapDelimiter ||
104 index === lastCharacterIndex) && charBefore === options.delimiter.field) {
105 // If we reached an EOL delimiter or the end of the csv and the previous character is a field delimiter...
106
107 // If the start index is the current index (and since the previous character is a comma),
108 // then the value being parsed is an empty value accordingly, add an empty string
109 let parsedValue = nextNChar === options.delimiter.eol && stateVariables.startIndex === index
110 ? ''
111 // Otherwise, there's a valid value, and the start index isn't the current index, grab the whole value
112 : csv.substr(stateVariables.startIndex);
113
114 // Push the value for the field that we were parsing
115 splitLine.push(parsedValue);
116
117 // Since the last character is a comma, there's still an additional implied field value trailing the comma.
118 // Since this value is empty, we push an extra empty value
119 splitLine.push('');
120
121 // Finally, push the split line values into the lines array and clear the split line
122 lines.push(splitLine);
123 splitLine = [];
124 stateVariables.startIndex = index + eolDelimiterLength;
125 stateVariables.parsingValue = true;
126 stateVariables.insideWrapDelimiter = charAfter === options.delimiter.wrap;
127 } else if (index === lastCharacterIndex || nextNChar === options.delimiter.eol &&
128 // if we aren't inside wrap delimiters or if we are but the character before was a wrap delimiter and we didn't just see two
129 (!stateVariables.insideWrapDelimiter ||
130 stateVariables.insideWrapDelimiter && charBefore === options.delimiter.wrap && !stateVariables.justParsedDoubleQuote)) {
131 // Otherwise if we reached the end of the line or csv (and current character is not a field delimiter)
132
133 let toIndex = index !== lastCharacterIndex || charBefore === options.delimiter.wrap ? index : undefined;
134
135 // Retrieve the remaining value and add it to the split line list of values
136 splitLine.push(csv.substring(stateVariables.startIndex, toIndex));
137
138 // Finally, push the split line values into the lines array and clear the split line
139 lines.push(splitLine);
140 splitLine = [];
141 stateVariables.startIndex = index + eolDelimiterLength;
142 stateVariables.parsingValue = true;
143 stateVariables.insideWrapDelimiter = charAfter === options.delimiter.wrap;
144 } else if ((charBefore !== options.delimiter.wrap || stateVariables.justParsedDoubleQuote && charBefore === options.delimiter.wrap) &&
145 character === options.delimiter.wrap && utils.getNCharacters(csv, index + 1, eolDelimiterLength) === options.delimiter.eol) {
146 // If we reach a wrap which is not preceded by a wrap delim and the next character is an EOL delim (ie. *"\n)
147
148 stateVariables.insideWrapDelimiter = false;
149 stateVariables.parsingValue = false;
150 // Next iteration will substring, add the value to the line, and push the line onto the array of lines
151 } else if (character === options.delimiter.wrap && (index === 0 || utils.getNCharacters(csv, index - 1, eolDelimiterLength) === options.delimiter.eol)) {
152 // If the line starts with a wrap delimiter (ie. "*)
153
154 stateVariables.insideWrapDelimiter = true;
155 stateVariables.parsingValue = true;
156 stateVariables.startIndex = index;
157 } else if (character === options.delimiter.wrap && charAfter === options.delimiter.field) {
158 // If we reached a wrap delimiter with a field delimiter after it (ie. *",)
159
160 splitLine.push(csv.substring(stateVariables.startIndex, index + 1));
161 stateVariables.startIndex = index + 2; // next value starts after the field delimiter
162 stateVariables.insideWrapDelimiter = false;
163 stateVariables.parsingValue = false;
164 } else if (character === options.delimiter.wrap && charBefore === options.delimiter.field &&
165 !stateVariables.insideWrapDelimiter && !stateVariables.parsingValue) {
166 // If we reached a wrap delimiter after a comma and we aren't inside a wrap delimiter
167
168 stateVariables.startIndex = index;
169 stateVariables.insideWrapDelimiter = true;
170 stateVariables.parsingValue = true;
171 } else if (character === options.delimiter.wrap && charBefore === options.delimiter.field &&
172 !stateVariables.insideWrapDelimiter && stateVariables.parsingValue) {
173 // If we reached a wrap delimiter with a field delimiter after it (ie. ,"*)
174
175 splitLine.push(csv.substring(stateVariables.startIndex, index - 1));
176 stateVariables.insideWrapDelimiter = true;
177 stateVariables.parsingValue = true;
178 stateVariables.startIndex = index;
179 } else if (character === options.delimiter.wrap && charAfter === options.delimiter.wrap) {
180 // If we run into an escaped quote (ie. "") skip past the second quote
181
182 index += 2;
183 stateVariables.justParsedDoubleQuote = true;
184 continue;
185 } else if (character === options.delimiter.field && charBefore !== options.delimiter.wrap &&
186 charAfter !== options.delimiter.wrap && !stateVariables.insideWrapDelimiter &&
187 stateVariables.parsingValue) {
188 // If we reached a field delimiter and are not inside the wrap delimiters (ie. *,*)
189
190 splitLine.push(csv.substring(stateVariables.startIndex, index));
191 stateVariables.startIndex = index + 1;
192 } else if (character === options.delimiter.field && charBefore === options.delimiter.wrap &&
193 charAfter !== options.delimiter.wrap && !stateVariables.parsingValue) {
194 // If we reached a field delimiter, the previous character was a wrap delimiter, and the
195 // next character is not a wrap delimiter (ie. ",*)
196
197 stateVariables.insideWrapDelimiter = false;
198 stateVariables.parsingValue = true;
199 stateVariables.startIndex = index + 1;
200 }
201 // Otherwise increment to the next character
202 index++;
203 // Reset the double quote state variable
204 stateVariables.justParsedDoubleQuote = false;
205 }
206
207 return lines;
208 }
209
210 /**
211 * Retrieves the record lines from the split CSV lines and sets it on the params object
212 * @param params
213 * @returns {*}
214 */
215 function retrieveRecordLines(params) {
216 params.recordLines = params.lines.splice(1); // All lines except for the header line
217
218 return params;
219 }
220
221 /**
222 * Retrieves the value for the record from the line at the provided key.
223 * @param line {String[]} split line values for the record
224 * @param key {Object} {index: Number, value: String}
225 */
226 function retrieveRecordValueFromLine(line, key) {
227 // If there is a value at the key's index, use it; otherwise, null
228 let value = line[key.index];
229
230 // Perform any necessary value conversions on the record value
231 return processRecordValue(value);
232 }
233
234 /**
235 * Processes the record's value by parsing the data to ensure the CSV is
236 * converted to the JSON that created it.
237 * @param fieldValue {String}
238 * @returns {*}
239 */
240 function processRecordValue(fieldValue) {
241 // If the value is an array representation, convert it
242 let parsedJson = parseValue(fieldValue);
243 // If parsedJson is anything aside from an error, then we want to use the parsed value
244 // This allows us to interpret values like 'null' --> null, 'false' --> false
245 if (!utils.isError(parsedJson)) {
246 fieldValue = parsedJson;
247 } else if (fieldValue === 'undefined') {
248 fieldValue = undefined;
249 }
250
251 return fieldValue;
252 }
253
254 /**
255 * Trims the record value, if specified by the user via the options object
256 * @param fieldValue
257 * @returns {String|null}
258 */
259 function trimRecordValue(fieldValue) {
260 if (options.trimFieldValues && !utils.isNull(fieldValue)) {
261 return fieldValue.trim();
262 }
263 return fieldValue;
264 }
265
266 /**
267 * Create a JSON document with the given keys (designated by the CSV header)
268 * and the values (from the given line)
269 * @param keys String[]
270 * @param line String
271 * @returns {Object} created json document
272 */
273 function createDocument(keys, line) {
274 // Reduce the keys into a JSON document representing the given line
275 return keys.reduce((document, key) => {
276 // If there is a value at the key's index in the line, set the value; otherwise null
277 let value = retrieveRecordValueFromLine(line, key);
278
279 // Otherwise add the key and value to the document
280 return path.setPath(document, key.value, value);
281 }, {});
282 }
283
284 /**
285 * Removes the outermost wrap delimiters from a value, if they are present
286 * Otherwise, the non-wrapped value is returned as is
287 * @param fieldValue
288 * @returns {String}
289 */
290 function removeWrapDelimitersFromValue(fieldValue) {
291 let firstChar = fieldValue[0],
292 lastIndex = fieldValue.length - 1,
293 lastChar = fieldValue[lastIndex];
294 // If the field starts and ends with a wrap delimiter
295 if (firstChar === options.delimiter.wrap && lastChar === options.delimiter.wrap) {
296 return fieldValue.substr(1, lastIndex - 1);
297 }
298 return fieldValue;
299 }
300
301 /**
302 * Unescapes wrap delimiters by replacing duplicates with a single (eg. "" -> ")
303 * This is done in order to parse RFC 4180 compliant CSV back to JSON
304 * @param fieldValue
305 * @returns {String}
306 */
307 function unescapeWrapDelimiterInField(fieldValue) {
308 return fieldValue.replace(escapedWrapDelimiterRegex, options.delimiter.wrap);
309 }
310
311 /**
312 * Main helper function to convert the CSV to the JSON document array
313 * @param params {Object} {lines: [String], callback: Function}
314 * @returns {Array}
315 */
316 function transformRecordLines(params) {
317 params.json = params.recordLines.reduce((generatedJsonObjects, line) => { // For each line, create the document and add it to the array of documents
318 line = line.map((fieldValue) => {
319 // Perform the necessary operations on each line
320 fieldValue = removeWrapDelimitersFromValue(fieldValue);
321 fieldValue = unescapeWrapDelimiterInField(fieldValue);
322 fieldValue = trimRecordValue(fieldValue);
323
324 return fieldValue;
325 });
326
327 let generatedDocument = createDocument(params.headerFields, line);
328 return generatedJsonObjects.concat(generatedDocument);
329 }, []);
330
331 return params;
332 }
333
334 /**
335 * Attempts to parse the provided value. If it is not parsable, then an error is returned
336 * @param value
337 * @returns {*}
338 */
339 function parseValue(value) {
340 try {
341 if (utils.isStringRepresentation(value, options) && !utils.isDateRepresentation(value)) {
342 return value;
343 }
344
345 let parsedJson = JSON.parse(value);
346
347 // If the parsed value is an array, then we also need to trim record values, if specified
348 if (Array.isArray(parsedJson)) {
349 return parsedJson.map(trimRecordValue);
350 }
351
352 return parsedJson;
353 } catch (err) {
354 return err;
355 }
356 }
357
358 /**
359 * Internally exported csv2json function
360 * Takes options as a document, data as a CSV string, and a callback that will be used to report the results
361 * @param data String csv string
362 * @param callback Function callback function
363 */
364 function convert(data, callback) {
365 // Split the CSV into lines using the specified EOL option
366 // validateCsv(data, callback)
367 // .then(stripExcelBOM)
368 stripExcelBOM(data)
369 .then(splitCsvLines)
370 .then(retrieveHeading) // Retrieve the headings from the CSV, unless the user specified the keys
371 .then(retrieveRecordLines) // Retrieve the record lines from the CSV
372 .then(transformRecordLines) // Retrieve the JSON document array
373 .then((params) => callback(null, params.json)) // Send the data back to the caller
374 .catch(callback);
375 }
376
377 return {
378 convert,
379 validationFn: utils.isString,
380 validationMessages: constants.errors.csv2json
381 };
382};
383
384module.exports = { Csv2Json };