1 | ;
|
2 |
|
3 | let path = require('doc-path'),
|
4 | deeks = require('deeks'),
|
5 | constants = require('./constants.json'),
|
6 | utils = require('./utils');
|
7 |
|
8 | const Json2Csv = function(options) {
|
9 | const wrapDelimiterCheckRegex = new RegExp(options.delimiter.wrap, 'g'),
|
10 | crlfSearchRegex = /\r?\n|\r/,
|
11 | expandingWithoutUnwinding = options.expandArrayObjects && !options.unwindArrays,
|
12 | deeksOptions = {
|
13 | expandArrayObjects: expandingWithoutUnwinding,
|
14 | ignoreEmptyArraysWhenExpanding: expandingWithoutUnwinding
|
15 | };
|
16 |
|
17 | /** HEADER FIELD FUNCTIONS **/
|
18 |
|
19 | /**
|
20 | * Returns the list of data field names of all documents in the provided list
|
21 | * @param data {Array<Object>} Data to be converted
|
22 | * @returns {Promise.<Array[String]>}
|
23 | */
|
24 | function getFieldNameList(data) {
|
25 | // If keys weren't specified, then we'll use the list of keys generated by the deeks module
|
26 | return Promise.resolve(deeks.deepKeysFromList(data, deeksOptions));
|
27 | }
|
28 |
|
29 | /**
|
30 | * Processes the schemas by checking for schema differences, if so desired.
|
31 | * If schema differences are not to be checked, then it resolves the unique
|
32 | * list of field names.
|
33 | * @param documentSchemas
|
34 | * @returns {Promise.<Array[String]>}
|
35 | */
|
36 | function processSchemas(documentSchemas) {
|
37 | // If the user wants to check for the same schema (regardless of schema ordering)
|
38 | if (options.checkSchemaDifferences) {
|
39 | return checkSchemaDifferences(documentSchemas);
|
40 | } else {
|
41 | // Otherwise, we do not care if the schemas are different, so we should get the unique list of keys
|
42 | let uniqueFieldNames = utils.unique(utils.flatten(documentSchemas));
|
43 | return Promise.resolve(uniqueFieldNames);
|
44 | }
|
45 | }
|
46 |
|
47 | /**
|
48 | * This function performs the schema difference check, if the user specifies that it should be checked.
|
49 | * If there are no field names, then there are no differences.
|
50 | * Otherwise, we get the first schema and the remaining list of schemas
|
51 | * @param documentSchemas
|
52 | * @returns {*}
|
53 | */
|
54 | function checkSchemaDifferences(documentSchemas) {
|
55 | // have multiple documents - ensure only one schema (regardless of field ordering)
|
56 | let firstDocSchema = documentSchemas[0],
|
57 | restOfDocumentSchemas = documentSchemas.slice(1),
|
58 | schemaDifferences = computeNumberOfSchemaDifferences(firstDocSchema, restOfDocumentSchemas);
|
59 |
|
60 | // If there are schema inconsistencies, throw a schema not the same error
|
61 | if (schemaDifferences) {
|
62 | return Promise.reject(new Error(constants.errors.json2csv.notSameSchema));
|
63 | }
|
64 |
|
65 | return Promise.resolve(firstDocSchema);
|
66 | }
|
67 |
|
68 | /**
|
69 | * Computes the number of schema differences
|
70 | * @param firstDocSchema
|
71 | * @param restOfDocumentSchemas
|
72 | * @returns {*}
|
73 | */
|
74 | function computeNumberOfSchemaDifferences(firstDocSchema, restOfDocumentSchemas) {
|
75 | return restOfDocumentSchemas.reduce((schemaDifferences, documentSchema) => {
|
76 | // If there is a difference between the schemas, increment the counter of schema inconsistencies
|
77 | let numberOfDifferences = utils.computeSchemaDifferences(firstDocSchema, documentSchema).length;
|
78 | return numberOfDifferences > 0
|
79 | ? schemaDifferences + 1
|
80 | : schemaDifferences;
|
81 | }, 0);
|
82 | }
|
83 |
|
84 | /**
|
85 | * If so specified, this sorts the header field names alphabetically
|
86 | * @param fieldNames {Array<String>}
|
87 | * @returns {Array<String>} sorted field names, or unsorted if sorting not specified
|
88 | */
|
89 | function sortHeaderFields(fieldNames) {
|
90 | if (options.sortHeader) {
|
91 | return fieldNames.sort();
|
92 | }
|
93 | return fieldNames;
|
94 | }
|
95 |
|
96 | /**
|
97 | * Trims the header fields, if the user desires them to be trimmed.
|
98 | * @param params
|
99 | * @returns {*}
|
100 | */
|
101 | function trimHeaderFields(params) {
|
102 | if (options.trimHeaderFields) {
|
103 | params.headerFields = params.headerFields.map((field) => field.split('.')
|
104 | .map((component) => component.trim())
|
105 | .join('.')
|
106 | );
|
107 | }
|
108 | return params;
|
109 | }
|
110 |
|
111 | /**
|
112 | * Wrap the headings, if desired by the user.
|
113 | * @param params
|
114 | * @returns {*}
|
115 | */
|
116 | function wrapHeaderFields(params) {
|
117 | // only perform this if we are actually prepending the header
|
118 | if (options.prependHeader) {
|
119 | params.headerFields = params.headerFields.map(function(headingKey) {
|
120 | return wrapFieldValueIfNecessary(headingKey);
|
121 | });
|
122 | }
|
123 | return params;
|
124 | }
|
125 |
|
126 | /**
|
127 | * Generates the CSV header string by joining the headerFields by the field delimiter
|
128 | * @param params
|
129 | * @returns {*}
|
130 | */
|
131 | function generateCsvHeader(params) {
|
132 | params.header = params.headerFields.join(options.delimiter.field);
|
133 | return params;
|
134 | }
|
135 |
|
136 | /**
|
137 | * Retrieve the headings for all documents and return it.
|
138 | * This checks that all documents have the same schema.
|
139 | * @param data
|
140 | * @returns {Promise}
|
141 | */
|
142 | function retrieveHeaderFields(data) {
|
143 | if (options.keys && !options.unwindArrays) {
|
144 | return Promise.resolve(options.keys)
|
145 | .then(sortHeaderFields);
|
146 | }
|
147 |
|
148 | return getFieldNameList(data)
|
149 | .then(processSchemas)
|
150 | .then(sortHeaderFields);
|
151 | }
|
152 |
|
153 | /** RECORD FIELD FUNCTIONS **/
|
154 |
|
155 | /**
|
156 | * Unwinds objects in arrays within record objects if the user specifies the
|
157 | * expandArrayObjects option. If not specified, this passes the params
|
158 | * argument through to the next function in the promise chain.
|
159 | * @param params {Object}
|
160 | * @returns {Promise}
|
161 | */
|
162 | function unwindRecordsIfNecessary(params) {
|
163 | if (options.unwindArrays) {
|
164 | const originalRecordsLength = params.records.length;
|
165 |
|
166 | // Unwind each of the documents at the given headerField
|
167 | params.headerFields.forEach((headerField) => {
|
168 | params.records = utils.unwind(params.records, headerField);
|
169 | });
|
170 |
|
171 | return retrieveHeaderFields(params.records)
|
172 | .then((headerFields) => {
|
173 | params.headerFields = headerFields;
|
174 |
|
175 | // If we were able to unwind more arrays, then try unwinding again...
|
176 | if (originalRecordsLength !== params.records.length) {
|
177 | return unwindRecordsIfNecessary(params);
|
178 | }
|
179 | // Otherwise, we didn't unwind any additional arrays, so continue...
|
180 |
|
181 | // If keys were provided, set the headerFields to the provided keys:
|
182 | if (options.keys) {
|
183 | params.headerFields = options.keys;
|
184 | }
|
185 | return params;
|
186 | });
|
187 | }
|
188 | return params;
|
189 | }
|
190 |
|
191 | /**
|
192 | * Main function which handles the processing of a record, or document to be converted to CSV format
|
193 | * This function specifies and performs the necessary operations in the necessary order
|
194 | * in order to obtain the data and convert it to CSV form while maintaining RFC 4180 compliance.
|
195 | * * Order of operations:
|
196 | * - Get fields from provided key list (as array of actual values)
|
197 | * - Convert the values to csv/string representation [possible option here for custom converters?]
|
198 | * - Trim fields
|
199 | * - Determine if they need to be wrapped (& wrap if necessary)
|
200 | * - Combine values for each line (by joining by field delimiter)
|
201 | * @param params
|
202 | * @returns {*}
|
203 | */
|
204 | function processRecords(params) {
|
205 | params.records = params.records.map((record) => {
|
206 | // Retrieve data for each of the headerFields from this record
|
207 | let recordFieldData = retrieveRecordFieldData(record, params.headerFields),
|
208 |
|
209 | // Process the data in this record and return the
|
210 | processedRecordData = recordFieldData.map((fieldValue) => {
|
211 | fieldValue = trimRecordFieldValue(fieldValue);
|
212 | fieldValue = recordFieldValueToString(fieldValue);
|
213 | fieldValue = wrapFieldValueIfNecessary(fieldValue);
|
214 |
|
215 | return fieldValue;
|
216 | });
|
217 |
|
218 | // Join the record data by the field delimiter
|
219 | return generateCsvRowFromRecord(processedRecordData);
|
220 | }).join(options.delimiter.eol);
|
221 |
|
222 | return params;
|
223 | }
|
224 |
|
225 | /**
|
226 | * Helper function intended to process *just* array values when the expandArrayObjects setting is set to true
|
227 | * @param recordFieldValue
|
228 | * @returns {*} processed array value
|
229 | */
|
230 | function processRecordFieldDataForExpandedArrayObject(recordFieldValue) {
|
231 | let filteredRecordFieldValue = utils.removeEmptyFields(recordFieldValue);
|
232 |
|
233 | // If we have an array and it's either empty of full of empty values, then use an empty value representation
|
234 | if (!recordFieldValue.length || !filteredRecordFieldValue.length) {
|
235 | return options.emptyFieldValue || '';
|
236 | } else if (filteredRecordFieldValue.length === 1) {
|
237 | // Otherwise, we have an array of actual values...
|
238 | // Since we are expanding array objects, we will want to key in on values of objects.
|
239 | return filteredRecordFieldValue[0]; // Extract the single value in the array
|
240 | }
|
241 |
|
242 | return recordFieldValue;
|
243 | }
|
244 |
|
245 | /**
|
246 | * Gets all field values from a particular record for the given list of fields
|
247 | * @param record
|
248 | * @param fields
|
249 | * @returns {Array}
|
250 | */
|
251 | function retrieveRecordFieldData(record, fields) {
|
252 | let recordValues = [];
|
253 |
|
254 | fields.forEach((field) => {
|
255 | let recordFieldValue = path.evaluatePath(record, field);
|
256 |
|
257 | if (!utils.isUndefined(options.emptyFieldValue) && utils.isEmptyField(recordFieldValue)) {
|
258 | recordFieldValue = options.emptyFieldValue;
|
259 | } else if (options.expandArrayObjects && Array.isArray(recordFieldValue)) {
|
260 | recordFieldValue = processRecordFieldDataForExpandedArrayObject(recordFieldValue);
|
261 | }
|
262 |
|
263 | recordValues.push(recordFieldValue);
|
264 | });
|
265 |
|
266 | return recordValues;
|
267 | }
|
268 |
|
269 | /**
|
270 | * Converts a record field value to its string representation
|
271 | * @param fieldValue
|
272 | * @returns {*}
|
273 | */
|
274 | function recordFieldValueToString(fieldValue) {
|
275 | if (Array.isArray(fieldValue) || utils.isObject(fieldValue) && !utils.isDate(fieldValue)) {
|
276 | return JSON.stringify(fieldValue);
|
277 | } else if (utils.isUndefined(fieldValue)) {
|
278 | return 'undefined';
|
279 | } else if (utils.isNull(fieldValue)) {
|
280 | return 'null';
|
281 | } else {
|
282 | return fieldValue.toString();
|
283 | }
|
284 | }
|
285 |
|
286 | /**
|
287 | * Trims the record field value, if specified by the user's provided options
|
288 | * @param fieldValue
|
289 | * @returns {*}
|
290 | */
|
291 | function trimRecordFieldValue(fieldValue) {
|
292 | if (options.trimFieldValues) {
|
293 | if (Array.isArray(fieldValue)) {
|
294 | return fieldValue.map(trimRecordFieldValue);
|
295 | } else if (utils.isString(fieldValue)) {
|
296 | return fieldValue.trim();
|
297 | }
|
298 | return fieldValue;
|
299 | }
|
300 | return fieldValue;
|
301 | }
|
302 |
|
303 | /**
|
304 | * Escapes quotation marks in the field value, if necessary, and appropriately
|
305 | * wraps the record field value if it contains a comma (field delimiter),
|
306 | * quotation mark (wrap delimiter), or a line break (CRLF)
|
307 | * @param fieldValue
|
308 | * @returns {*}
|
309 | */
|
310 | function wrapFieldValueIfNecessary(fieldValue) {
|
311 | const wrapDelimiter = options.delimiter.wrap;
|
312 |
|
313 | // eg. includes quotation marks (default delimiter)
|
314 | if (fieldValue.includes(options.delimiter.wrap)) {
|
315 | // add an additional quotation mark before each quotation mark appearing in the field value
|
316 | fieldValue = fieldValue.replace(wrapDelimiterCheckRegex, wrapDelimiter + wrapDelimiter);
|
317 | }
|
318 | // if the field contains a comma (field delimiter), quotation mark (wrap delimiter), line break, or CRLF
|
319 | // then enclose it in quotation marks (wrap delimiter)
|
320 | if (fieldValue.includes(options.delimiter.field) ||
|
321 | fieldValue.includes(options.delimiter.wrap) ||
|
322 | fieldValue.match(crlfSearchRegex)) {
|
323 | // wrap the field's value in a wrap delimiter (quotation marks by default)
|
324 | fieldValue = wrapDelimiter + fieldValue + wrapDelimiter;
|
325 | }
|
326 |
|
327 | return fieldValue;
|
328 | }
|
329 |
|
330 | /**
|
331 | * Generates the CSV record string by joining the field values together by the field delimiter
|
332 | * @param recordFieldValues
|
333 | */
|
334 | function generateCsvRowFromRecord(recordFieldValues) {
|
335 | return recordFieldValues.join(options.delimiter.field);
|
336 | }
|
337 |
|
338 | /** CSV COMPONENT COMBINER/FINAL PROCESSOR **/
|
339 | /**
|
340 | * Performs the final CSV construction by combining the fields in the appropriate
|
341 | * order depending on the provided options values and sends the generated CSV
|
342 | * back to the user
|
343 | * @param params
|
344 | */
|
345 | function generateCsvFromComponents(params) {
|
346 | let header = params.header,
|
347 | records = params.records,
|
348 |
|
349 | // If we are prepending the header, then add an EOL, otherwise just return the records
|
350 | csv = (options.excelBOM ? constants.values.excelBOM : '') +
|
351 | (options.prependHeader ? header + options.delimiter.eol : '') +
|
352 | records;
|
353 |
|
354 | return params.callback(null, csv);
|
355 | }
|
356 |
|
357 | /** MAIN CONVERTER FUNCTION **/
|
358 |
|
359 | /**
|
360 | * Internally exported json2csv function
|
361 | * Takes data as either a document or array of documents and a callback that will be used to report the results
|
362 | * @param data {Object|Array<Object>} documents to be converted to csv
|
363 | * @param callback {Function} callback function
|
364 | */
|
365 | function convert(data, callback) {
|
366 | // Single document, not an array
|
367 | if (utils.isObject(data) && !data.length) {
|
368 | data = [data]; // Convert to an array of the given document
|
369 | }
|
370 |
|
371 | // Retrieve the heading and then generate the CSV with the keys that are identified
|
372 | retrieveHeaderFields(data)
|
373 | .then((headerFields) => ({
|
374 | headerFields,
|
375 | callback,
|
376 | records: data
|
377 | }))
|
378 | .then(unwindRecordsIfNecessary)
|
379 | .then(processRecords)
|
380 | .then(wrapHeaderFields)
|
381 | .then(trimHeaderFields)
|
382 | .then(generateCsvHeader)
|
383 | .then(generateCsvFromComponents)
|
384 | .catch(callback);
|
385 | }
|
386 |
|
387 | return {
|
388 | convert,
|
389 | validationFn: utils.isObject,
|
390 | validationMessages: constants.errors.json2csv
|
391 | };
|
392 | };
|
393 |
|
394 | module.exports = { Json2Csv };
|