'use strict'; var stream = require('stream'); const is_object = function (obj) { return typeof obj === "object" && obj !== null && !Array.isArray(obj); }; class CsvError extends Error { constructor(code, message, options, ...contexts) { if (Array.isArray(message)) message = message.join(" ").trim(); super(message); if (Error.captureStackTrace !== undefined) { Error.captureStackTrace(this, CsvError); } this.code = code; for (const context of contexts) { for (const key in context) { const value = context[key]; this[key] = Buffer.isBuffer(value) ? value.toString(options.encoding) : value == null ? value : JSON.parse(JSON.stringify(value)); } } } } const normalize_columns_array = function (columns) { const normalizedColumns = []; for (let i = 0, l = columns.length; i < l; i++) { const column = columns[i]; if (column === undefined || column === null || column === false) { normalizedColumns[i] = { disabled: true }; } else if (typeof column === "string") { normalizedColumns[i] = { name: column }; } else if (is_object(column)) { if (typeof column.name !== "string") { throw new CsvError("CSV_OPTION_COLUMNS_MISSING_NAME", [ "Option columns missing name:", `property "name" is required at position ${i}`, "when column is an object literal", ]); } normalizedColumns[i] = column; } else { throw new CsvError("CSV_INVALID_COLUMN_DEFINITION", [ "Invalid column definition:", "expect a string or a literal object,", `got ${JSON.stringify(column)} at position ${i}`, ]); } } return normalizedColumns; }; class ResizeableBuffer { constructor(size = 100) { this.size = size; this.length = 0; this.buf = Buffer.allocUnsafe(size); } prepend(val) { if (Buffer.isBuffer(val)) { const length = this.length + val.length; if (length >= this.size) { this.resize(); if (length >= this.size) { throw Error("INVALID_BUFFER_STATE"); } } const buf = this.buf; this.buf = Buffer.allocUnsafe(this.size); val.copy(this.buf, 0); buf.copy(this.buf, val.length); this.length += val.length; } else { const length = this.length++; if (length === this.size) { this.resize(); } const buf = this.clone(); this.buf[0] = val; buf.copy(this.buf, 1, 0, length); } } append(val) { const length = this.length++; if (length === this.size) { this.resize(); } this.buf[length] = val; } clone() { return Buffer.from(this.buf.slice(0, this.length)); } resize() { const length = this.length; this.size = this.size * 2; const buf = Buffer.allocUnsafe(this.size); this.buf.copy(buf, 0, 0, length); this.buf = buf; } toString(encoding) { if (encoding) { return this.buf.slice(0, this.length).toString(encoding); } else { return Uint8Array.prototype.slice.call(this.buf.slice(0, this.length)); } } toJSON() { return this.toString("utf8"); } reset() { this.length = 0; } } // white space characters // https://en.wikipedia.org/wiki/Whitespace_character // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#Types // \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff const np = 12; const cr$1 = 13; // `\r`, carriage return, 0x0D in hexadécimal, 13 in decimal const nl$1 = 10; // `\n`, newline, 0x0A in hexadecimal, 10 in decimal const space = 32; const tab = 9; const init_state = function (options) { return { bomSkipped: false, bufBytesStart: 0, castField: options.cast_function, commenting: false, // Current error encountered by a record error: undefined, enabled: options.from_line === 1, escaping: false, escapeIsQuote: Buffer.isBuffer(options.escape) && Buffer.isBuffer(options.quote) && Buffer.compare(options.escape, options.quote) === 0, // columns can be `false`, `true`, `Array` expectedRecordLength: Array.isArray(options.columns) ? options.columns.length : undefined, field: new ResizeableBuffer(20), firstLineToHeaders: options.cast_first_line_to_header, needMoreDataSize: Math.max( // Skip if the remaining buffer smaller than comment options.comment !== null ? options.comment.length : 0, // Skip if the remaining buffer can be delimiter ...options.delimiter.map((delimiter) => delimiter.length), // Skip if the remaining buffer can be escape sequence options.quote !== null ? options.quote.length : 0, ), previousBuf: undefined, quoting: false, stop: false, rawBuffer: new ResizeableBuffer(100), record: [], recordHasError: false, record_length: 0, recordDelimiterMaxLength: options.record_delimiter.length === 0 ? 0 : Math.max(...options.record_delimiter.map((v) => v.length)), trimChars: [ Buffer.from(" ", options.encoding)[0], Buffer.from("\t", options.encoding)[0], ], wasQuoting: false, wasRowDelimiter: false, timchars: [ Buffer.from(Buffer.from([cr$1], "utf8").toString(), options.encoding), Buffer.from(Buffer.from([nl$1], "utf8").toString(), options.encoding), Buffer.from(Buffer.from([np], "utf8").toString(), options.encoding), Buffer.from(Buffer.from([space], "utf8").toString(), options.encoding), Buffer.from(Buffer.from([tab], "utf8").toString(), options.encoding), ], }; }; const underscore = function (str) { return str.replace(/([A-Z])/g, function (_, match) { return "_" + match.toLowerCase(); }); }; const normalize_options = function (opts) { const options = {}; // Merge with user options for (const opt in opts) { options[underscore(opt)] = opts[opt]; } // Normalize option `encoding` // Note: defined first because other options depends on it // to convert chars/strings into buffers. if (options.encoding === undefined || options.encoding === true) { options.encoding = "utf8"; } else if (options.encoding === null || options.encoding === false) { options.encoding = null; } else if ( typeof options.encoding !== "string" && options.encoding !== null ) { throw new CsvError( "CSV_INVALID_OPTION_ENCODING", [ "Invalid option encoding:", "encoding must be a string or null to return a buffer,", `got ${JSON.stringify(options.encoding)}`, ], options, ); } // Normalize option `bom` if ( options.bom === undefined || options.bom === null || options.bom === false ) { options.bom = false; } else if (options.bom !== true) { throw new CsvError( "CSV_INVALID_OPTION_BOM", [ "Invalid option bom:", "bom must be true,", `got ${JSON.stringify(options.bom)}`, ], options, ); } // Normalize option `cast` options.cast_function = null; if ( options.cast === undefined || options.cast === null || options.cast === false || options.cast === "" ) { options.cast = undefined; } else if (typeof options.cast === "function") { options.cast_function = options.cast; options.cast = true; } else if (options.cast !== true) { throw new CsvError( "CSV_INVALID_OPTION_CAST", [ "Invalid option cast:", "cast must be true or a function,", `got ${JSON.stringify(options.cast)}`, ], options, ); } // Normalize option `cast_date` if ( options.cast_date === undefined || options.cast_date === null || options.cast_date === false || options.cast_date === "" ) { options.cast_date = false; } else if (options.cast_date === true) { options.cast_date = function (value) { const date = Date.parse(value); return !isNaN(date) ? new Date(date) : value; }; } else if (typeof options.cast_date !== "function") { throw new CsvError( "CSV_INVALID_OPTION_CAST_DATE", [ "Invalid option cast_date:", "cast_date must be true or a function,", `got ${JSON.stringify(options.cast_date)}`, ], options, ); } // Normalize option `columns` options.cast_first_line_to_header = null; if (options.columns === true) { // Fields in the first line are converted as-is to columns options.cast_first_line_to_header = undefined; } else if (typeof options.columns === "function") { options.cast_first_line_to_header = options.columns; options.columns = true; } else if (Array.isArray(options.columns)) { options.columns = normalize_columns_array(options.columns); } else if ( options.columns === undefined || options.columns === null || options.columns === false ) { options.columns = false; } else { throw new CsvError( "CSV_INVALID_OPTION_COLUMNS", [ "Invalid option columns:", "expect an array, a function or true,", `got ${JSON.stringify(options.columns)}`, ], options, ); } // Normalize option `group_columns_by_name` if ( options.group_columns_by_name === undefined || options.group_columns_by_name === null || options.group_columns_by_name === false ) { options.group_columns_by_name = false; } else if (options.group_columns_by_name !== true) { throw new CsvError( "CSV_INVALID_OPTION_GROUP_COLUMNS_BY_NAME", [ "Invalid option group_columns_by_name:", "expect an boolean,", `got ${JSON.stringify(options.group_columns_by_name)}`, ], options, ); } else if (options.columns === false) { throw new CsvError( "CSV_INVALID_OPTION_GROUP_COLUMNS_BY_NAME", [ "Invalid option group_columns_by_name:", "the `columns` mode must be activated.", ], options, ); } // Normalize option `comment` if ( options.comment === undefined || options.comment === null || options.comment === false || options.comment === "" ) { options.comment = null; } else { if (typeof options.comment === "string") { options.comment = Buffer.from(options.comment, options.encoding); } if (!Buffer.isBuffer(options.comment)) { throw new CsvError( "CSV_INVALID_OPTION_COMMENT", [ "Invalid option comment:", "comment must be a buffer or a string,", `got ${JSON.stringify(options.comment)}`, ], options, ); } } // Normalize option `comment_no_infix` if ( options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false ) { options.comment_no_infix = false; } else if (options.comment_no_infix !== true) { throw new CsvError( "CSV_INVALID_OPTION_COMMENT", [ "Invalid option comment_no_infix:", "value must be a boolean,", `got ${JSON.stringify(options.comment_no_infix)}`, ], options, ); } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if (!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; if (options.delimiter.length === 0) { throw new CsvError( "CSV_INVALID_OPTION_DELIMITER", [ "Invalid option delimiter:", "delimiter must be a non empty string or buffer or array of string|buffer,", `got ${delimiter_json}`, ], options, ); } options.delimiter = options.delimiter.map(function (delimiter) { if (delimiter === undefined || delimiter === null || delimiter === false) { return Buffer.from(",", options.encoding); } if (typeof delimiter === "string") { delimiter = Buffer.from(delimiter, options.encoding); } if (!Buffer.isBuffer(delimiter) || delimiter.length === 0) { throw new CsvError( "CSV_INVALID_OPTION_DELIMITER", [ "Invalid option delimiter:", "delimiter must be a non empty string or buffer or array of string|buffer,", `got ${delimiter_json}`, ], options, ); } return delimiter; }); // Normalize option `escape` if (options.escape === undefined || options.escape === true) { options.escape = Buffer.from('"', options.encoding); } else if (typeof options.escape === "string") { options.escape = Buffer.from(options.escape, options.encoding); } else if (options.escape === null || options.escape === false) { options.escape = null; } if (options.escape !== null) { if (!Buffer.isBuffer(options.escape)) { throw new Error( `Invalid Option: escape must be a buffer, a string or a boolean, got ${JSON.stringify(options.escape)}`, ); } } // Normalize option `from` if (options.from === undefined || options.from === null) { options.from = 1; } else { if (typeof options.from === "string" && /\d+/.test(options.from)) { options.from = parseInt(options.from); } if (Number.isInteger(options.from)) { if (options.from < 0) { throw new Error( `Invalid Option: from must be a positive integer, got ${JSON.stringify(opts.from)}`, ); } } else { throw new Error( `Invalid Option: from must be an integer, got ${JSON.stringify(options.from)}`, ); } } // Normalize option `from_line` if (options.from_line === undefined || options.from_line === null) { options.from_line = 1; } else { if ( typeof options.from_line === "string" && /\d+/.test(options.from_line) ) { options.from_line = parseInt(options.from_line); } if (Number.isInteger(options.from_line)) { if (options.from_line <= 0) { throw new Error( `Invalid Option: from_line must be a positive integer greater than 0, got ${JSON.stringify(opts.from_line)}`, ); } } else { throw new Error( `Invalid Option: from_line must be an integer, got ${JSON.stringify(opts.from_line)}`, ); } } // Normalize options `ignore_last_delimiters` if ( options.ignore_last_delimiters === undefined || options.ignore_last_delimiters === null ) { options.ignore_last_delimiters = false; } else if (typeof options.ignore_last_delimiters === "number") { options.ignore_last_delimiters = Math.floor(options.ignore_last_delimiters); if (options.ignore_last_delimiters === 0) { options.ignore_last_delimiters = false; } } else if (typeof options.ignore_last_delimiters !== "boolean") { throw new CsvError( "CSV_INVALID_OPTION_IGNORE_LAST_DELIMITERS", [ "Invalid option `ignore_last_delimiters`:", "the value must be a boolean value or an integer,", `got ${JSON.stringify(options.ignore_last_delimiters)}`, ], options, ); } if (options.ignore_last_delimiters === true && options.columns === false) { throw new CsvError( "CSV_IGNORE_LAST_DELIMITERS_REQUIRES_COLUMNS", [ "The option `ignore_last_delimiters`", "requires the activation of the `columns` option", ], options, ); } // Normalize option `info` if ( options.info === undefined || options.info === null || options.info === false ) { options.info = false; } else if (options.info !== true) { throw new Error( `Invalid Option: info must be true, got ${JSON.stringify(options.info)}`, ); } // Normalize option `max_record_size` if ( options.max_record_size === undefined || options.max_record_size === null || options.max_record_size === false ) { options.max_record_size = 0; } else if ( Number.isInteger(options.max_record_size) && options.max_record_size >= 0 ) ; else if ( typeof options.max_record_size === "string" && /\d+/.test(options.max_record_size) ) { options.max_record_size = parseInt(options.max_record_size); } else { throw new Error( `Invalid Option: max_record_size must be a positive integer, got ${JSON.stringify(options.max_record_size)}`, ); } // Normalize option `objname` if ( options.objname === undefined || options.objname === null || options.objname === false ) { options.objname = undefined; } else if (Buffer.isBuffer(options.objname)) { if (options.objname.length === 0) { throw new Error(`Invalid Option: objname must be a non empty buffer`); } if (options.encoding === null) ; else { options.objname = options.objname.toString(options.encoding); } } else if (typeof options.objname === "string") { if (options.objname.length === 0) { throw new Error(`Invalid Option: objname must be a non empty string`); } // Great, nothing to do } else if (typeof options.objname === "number") ; else { throw new Error( `Invalid Option: objname must be a string or a buffer, got ${options.objname}`, ); } if (options.objname !== undefined) { if (typeof options.objname === "number") { if (options.columns !== false) { throw Error( "Invalid Option: objname index cannot be combined with columns or be defined as a field", ); } } else { // A string or a buffer if (options.columns === false) { throw Error( "Invalid Option: objname field must be combined with columns or be defined as an index", ); } } } // Normalize option `on_record` if (options.on_record === undefined || options.on_record === null) { options.on_record = undefined; } else if (typeof options.on_record !== "function") { throw new CsvError( "CSV_INVALID_OPTION_ON_RECORD", [ "Invalid option `on_record`:", "expect a function,", `got ${JSON.stringify(options.on_record)}`, ], options, ); } // Normalize option `on_skip` // options.on_skip ??= (err, chunk) => { // this.emit('skip', err, chunk); // }; if ( options.on_skip !== undefined && options.on_skip !== null && typeof options.on_skip !== "function" ) { throw new Error( `Invalid Option: on_skip must be a function, got ${JSON.stringify(options.on_skip)}`, ); } // Normalize option `quote` if ( options.quote === null || options.quote === false || options.quote === "" ) { options.quote = null; } else { if (options.quote === undefined || options.quote === true) { options.quote = Buffer.from('"', options.encoding); } else if (typeof options.quote === "string") { options.quote = Buffer.from(options.quote, options.encoding); } if (!Buffer.isBuffer(options.quote)) { throw new Error( `Invalid Option: quote must be a buffer or a string, got ${JSON.stringify(options.quote)}`, ); } } // Normalize option `raw` if ( options.raw === undefined || options.raw === null || options.raw === false ) { options.raw = false; } else if (options.raw !== true) { throw new Error( `Invalid Option: raw must be true, got ${JSON.stringify(options.raw)}`, ); } // Normalize option `record_delimiter` if (options.record_delimiter === undefined) { options.record_delimiter = []; } else if ( typeof options.record_delimiter === "string" || Buffer.isBuffer(options.record_delimiter) ) { if (options.record_delimiter.length === 0) { throw new CsvError( "CSV_INVALID_OPTION_RECORD_DELIMITER", [ "Invalid option `record_delimiter`:", "value must be a non empty string or buffer,", `got ${JSON.stringify(options.record_delimiter)}`, ], options, ); } options.record_delimiter = [options.record_delimiter]; } else if (!Array.isArray(options.record_delimiter)) { throw new CsvError( "CSV_INVALID_OPTION_RECORD_DELIMITER", [ "Invalid option `record_delimiter`:", "value must be a string, a buffer or array of string|buffer,", `got ${JSON.stringify(options.record_delimiter)}`, ], options, ); } options.record_delimiter = options.record_delimiter.map(function (rd, i) { if (typeof rd !== "string" && !Buffer.isBuffer(rd)) { throw new CsvError( "CSV_INVALID_OPTION_RECORD_DELIMITER", [ "Invalid option `record_delimiter`:", "value must be a string, a buffer or array of string|buffer", `at index ${i},`, `got ${JSON.stringify(rd)}`, ], options, ); } else if (rd.length === 0) { throw new CsvError( "CSV_INVALID_OPTION_RECORD_DELIMITER", [ "Invalid option `record_delimiter`:", "value must be a non empty string or buffer", `at index ${i},`, `got ${JSON.stringify(rd)}`, ], options, ); } if (typeof rd === "string") { rd = Buffer.from(rd, options.encoding); } return rd; }); // Normalize option `relax_column_count` if (typeof options.relax_column_count === "boolean") ; else if ( options.relax_column_count === undefined || options.relax_column_count === null ) { options.relax_column_count = false; } else { throw new Error( `Invalid Option: relax_column_count must be a boolean, got ${JSON.stringify(options.relax_column_count)}`, ); } if (typeof options.relax_column_count_less === "boolean") ; else if ( options.relax_column_count_less === undefined || options.relax_column_count_less === null ) { options.relax_column_count_less = false; } else { throw new Error( `Invalid Option: relax_column_count_less must be a boolean, got ${JSON.stringify(options.relax_column_count_less)}`, ); } if (typeof options.relax_column_count_more === "boolean") ; else if ( options.relax_column_count_more === undefined || options.relax_column_count_more === null ) { options.relax_column_count_more = false; } else { throw new Error( `Invalid Option: relax_column_count_more must be a boolean, got ${JSON.stringify(options.relax_column_count_more)}`, ); } // Normalize option `relax_quotes` if (typeof options.relax_quotes === "boolean") ; else if ( options.relax_quotes === undefined || options.relax_quotes === null ) { options.relax_quotes = false; } else { throw new Error( `Invalid Option: relax_quotes must be a boolean, got ${JSON.stringify(options.relax_quotes)}`, ); } // Normalize option `skip_empty_lines` if (typeof options.skip_empty_lines === "boolean") ; else if ( options.skip_empty_lines === undefined || options.skip_empty_lines === null ) { options.skip_empty_lines = false; } else { throw new Error( `Invalid Option: skip_empty_lines must be a boolean, got ${JSON.stringify(options.skip_empty_lines)}`, ); } // Normalize option `skip_records_with_empty_values` if (typeof options.skip_records_with_empty_values === "boolean") ; else if ( options.skip_records_with_empty_values === undefined || options.skip_records_with_empty_values === null ) { options.skip_records_with_empty_values = false; } else { throw new Error( `Invalid Option: skip_records_with_empty_values must be a boolean, got ${JSON.stringify(options.skip_records_with_empty_values)}`, ); } // Normalize option `skip_records_with_error` if (typeof options.skip_records_with_error === "boolean") ; else if ( options.skip_records_with_error === undefined || options.skip_records_with_error === null ) { options.skip_records_with_error = false; } else { throw new Error( `Invalid Option: skip_records_with_error must be a boolean, got ${JSON.stringify(options.skip_records_with_error)}`, ); } // Normalize option `rtrim` if ( options.rtrim === undefined || options.rtrim === null || options.rtrim === false ) { options.rtrim = false; } else if (options.rtrim !== true) { throw new Error( `Invalid Option: rtrim must be a boolean, got ${JSON.stringify(options.rtrim)}`, ); } // Normalize option `ltrim` if ( options.ltrim === undefined || options.ltrim === null || options.ltrim === false ) { options.ltrim = false; } else if (options.ltrim !== true) { throw new Error( `Invalid Option: ltrim must be a boolean, got ${JSON.stringify(options.ltrim)}`, ); } // Normalize option `trim` if ( options.trim === undefined || options.trim === null || options.trim === false ) { options.trim = false; } else if (options.trim !== true) { throw new Error( `Invalid Option: trim must be a boolean, got ${JSON.stringify(options.trim)}`, ); } // Normalize options `trim`, `ltrim` and `rtrim` if (options.trim === true && opts.ltrim !== false) { options.ltrim = true; } else if (options.ltrim !== true) { options.ltrim = false; } if (options.trim === true && opts.rtrim !== false) { options.rtrim = true; } else if (options.rtrim !== true) { options.rtrim = false; } // Normalize option `to` if (options.to === undefined || options.to === null) { options.to = -1; } else { if (typeof options.to === "string" && /\d+/.test(options.to)) { options.to = parseInt(options.to); } if (Number.isInteger(options.to)) { if (options.to <= 0) { throw new Error( `Invalid Option: to must be a positive integer greater than 0, got ${JSON.stringify(opts.to)}`, ); } } else { throw new Error( `Invalid Option: to must be an integer, got ${JSON.stringify(opts.to)}`, ); } } // Normalize option `to_line` if (options.to_line === undefined || options.to_line === null) { options.to_line = -1; } else { if (typeof options.to_line === "string" && /\d+/.test(options.to_line)) { options.to_line = parseInt(options.to_line); } if (Number.isInteger(options.to_line)) { if (options.to_line <= 0) { throw new Error( `Invalid Option: to_line must be a positive integer greater than 0, got ${JSON.stringify(opts.to_line)}`, ); } } else { throw new Error( `Invalid Option: to_line must be an integer, got ${JSON.stringify(opts.to_line)}`, ); } } return options; }; const isRecordEmpty = function (record) { return record.every( (field) => field == null || (field.toString && field.toString().trim() === ""), ); }; const cr = 13; // `\r`, carriage return, 0x0D in hexadécimal, 13 in decimal const nl = 10; // `\n`, newline, 0x0A in hexadecimal, 10 in decimal const boms = { // Note, the following are equals: // Buffer.from("\ufeff") // Buffer.from([239, 187, 191]) // Buffer.from('EFBBBF', 'hex') utf8: Buffer.from([239, 187, 191]), // Note, the following are equals: // Buffer.from "\ufeff", 'utf16le // Buffer.from([255, 254]) utf16le: Buffer.from([255, 254]), }; const transform = function (original_options = {}) { const info = { bytes: 0, comment_lines: 0, empty_lines: 0, invalid_field_length: 0, lines: 1, records: 0, }; const options = normalize_options(original_options); return { info: info, original_options: original_options, options: options, state: init_state(options), __needMoreData: function (i, bufLen, end) { if (end) return false; const { encoding, escape, quote } = this.options; const { quoting, needMoreDataSize, recordDelimiterMaxLength } = this.state; const numOfCharLeft = bufLen - i - 1; const requiredLength = Math.max( needMoreDataSize, // Skip if the remaining buffer smaller than record delimiter // If "record_delimiter" is yet to be discovered: // 1. It is equals to `[]` and "recordDelimiterMaxLength" equals `0` // 2. We set the length to windows line ending in the current encoding // Note, that encoding is known from user or bom discovery at that point // recordDelimiterMaxLength, recordDelimiterMaxLength === 0 ? Buffer.from("\r\n", encoding).length : recordDelimiterMaxLength, // Skip if remaining buffer can be an escaped quote quoting ? (escape === null ? 0 : escape.length) + quote.length : 0, // Skip if remaining buffer can be record delimiter following the closing quote quoting ? quote.length + recordDelimiterMaxLength : 0, ); return numOfCharLeft < requiredLength; }, // Central parser implementation parse: function (nextBuf, end, push, close) { const { bom, comment_no_infix, encoding, from_line, ltrim, max_record_size, raw, relax_quotes, rtrim, skip_empty_lines, to, to_line, } = this.options; let { comment, escape, quote, record_delimiter } = this.options; const { bomSkipped, previousBuf, rawBuffer, escapeIsQuote } = this.state; let buf; if (previousBuf === undefined) { if (nextBuf === undefined) { // Handle empty string close(); return; } else { buf = nextBuf; } } else if (previousBuf !== undefined && nextBuf === undefined) { buf = previousBuf; } else { buf = Buffer.concat([previousBuf, nextBuf]); } // Handle UTF BOM if (bomSkipped === false) { if (bom === false) { this.state.bomSkipped = true; } else if (buf.length < 3) { // No enough data if (end === false) { // Wait for more data this.state.previousBuf = buf; return; } } else { for (const encoding in boms) { if (boms[encoding].compare(buf, 0, boms[encoding].length) === 0) { // Skip BOM const bomLength = boms[encoding].length; this.state.bufBytesStart += bomLength; buf = buf.slice(bomLength); // Renormalize original options with the new encoding this.options = normalize_options({ ...this.original_options, encoding: encoding, }); // Options will re-evaluate the Buffer with the new encoding ({ comment, escape, quote } = this.options); break; } } this.state.bomSkipped = true; } } const bufLen = buf.length; let pos; for (pos = 0; pos < bufLen; pos++) { // Ensure we get enough space to look ahead // There should be a way to move this out of the loop if (this.__needMoreData(pos, bufLen, end)) { break; } if (this.state.wasRowDelimiter === true) { this.info.lines++; this.state.wasRowDelimiter = false; } if (to_line !== -1 && this.info.lines > to_line) { this.state.stop = true; close(); return; } // Auto discovery of record_delimiter, unix, mac and windows supported if (this.state.quoting === false && record_delimiter.length === 0) { const record_delimiterCount = this.__autoDiscoverRecordDelimiter( buf, pos, ); if (record_delimiterCount) { record_delimiter = this.options.record_delimiter; } } const chr = buf[pos]; if (raw === true) { rawBuffer.append(chr); } if ( (chr === cr || chr === nl) && this.state.wasRowDelimiter === false ) { this.state.wasRowDelimiter = true; } // Previous char was a valid escape char // treat the current char as a regular char if (this.state.escaping === true) { this.state.escaping = false; } else { // Escape is only active inside quoted fields // We are quoting, the char is an escape chr and there is a chr to escape // if(escape !== null && this.state.quoting === true && chr === escape && pos + 1 < bufLen){ if ( escape !== null && this.state.quoting === true && this.__isEscape(buf, pos, chr) && pos + escape.length < bufLen ) { if (escapeIsQuote) { if (this.__isQuote(buf, pos + escape.length)) { this.state.escaping = true; pos += escape.length - 1; continue; } } else { this.state.escaping = true; pos += escape.length - 1; continue; } } // Not currently escaping and chr is a quote // TODO: need to compare bytes instead of single char if (this.state.commenting === false && this.__isQuote(buf, pos)) { if (this.state.quoting === true) { const nextChr = buf[pos + quote.length]; const isNextChrTrimable = rtrim && this.__isCharTrimable(buf, pos + quote.length); const isNextChrComment = comment !== null && this.__compareBytes(comment, buf, pos + quote.length, nextChr); const isNextChrDelimiter = this.__isDelimiter( buf, pos + quote.length, nextChr, ); const isNextChrRecordDelimiter = record_delimiter.length === 0 ? this.__autoDiscoverRecordDelimiter(buf, pos + quote.length) : this.__isRecordDelimiter(nextChr, buf, pos + quote.length); // Escape a quote // Treat next char as a regular character if ( escape !== null && this.__isEscape(buf, pos, chr) && this.__isQuote(buf, pos + escape.length) ) { pos += escape.length - 1; } else if ( !nextChr || isNextChrDelimiter || isNextChrRecordDelimiter || isNextChrComment || isNextChrTrimable ) { this.state.quoting = false; this.state.wasQuoting = true; pos += quote.length - 1; continue; } else if (relax_quotes === false) { const err = this.__error( new CsvError( "CSV_INVALID_CLOSING_QUOTE", [ "Invalid Closing Quote:", `got "${String.fromCharCode(nextChr)}"`, `at line ${this.info.lines}`, "instead of delimiter, record delimiter, trimable character", "(if activated) or comment", ], this.options, this.__infoField(), ), ); if (err !== undefined) return err; } else { this.state.quoting = false; this.state.wasQuoting = true; this.state.field.prepend(quote); pos += quote.length - 1; } } else { if (this.state.field.length !== 0) { // In relax_quotes mode, treat opening quote preceded by chrs as regular if (relax_quotes === false) { const info = this.__infoField(); const bom = Object.keys(boms) .map((b) => boms[b].equals(this.state.field.toString()) ? b : false, ) .filter(Boolean)[0]; const err = this.__error( new CsvError( "INVALID_OPENING_QUOTE", [ "Invalid Opening Quote:", `a quote is found on field ${JSON.stringify(info.column)} at line ${info.lines}, value is ${JSON.stringify(this.state.field.toString(encoding))}`, bom ? `(${bom} bom)` : undefined, ], this.options, info, { field: this.state.field, }, ), ); if (err !== undefined) return err; } } else { this.state.quoting = true; pos += quote.length - 1; continue; } } } if (this.state.quoting === false) { const recordDelimiterLength = this.__isRecordDelimiter( chr, buf, pos, ); if (recordDelimiterLength !== 0) { // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && this.state.wasQuoting === false && this.state.record.length === 0 && this.state.field.length === 0; if (skipCommentLine) { this.info.comment_lines++; // Skip full comment line } else { // Activate records emition if above from_line if ( this.state.enabled === false && this.info.lines + (this.state.wasRowDelimiter === true ? 1 : 0) >= from_line ) { this.state.enabled = true; this.__resetField(); this.__resetRecord(); pos += recordDelimiterLength - 1; continue; } // Skip if line is empty and skip_empty_lines activated if ( skip_empty_lines === true && this.state.wasQuoting === false && this.state.record.length === 0 && this.state.field.length === 0 ) { this.info.empty_lines++; pos += recordDelimiterLength - 1; continue; } this.info.bytes = this.state.bufBytesStart + pos; const errField = this.__onField(); if (errField !== undefined) return errField; this.info.bytes = this.state.bufBytesStart + pos + recordDelimiterLength; const errRecord = this.__onRecord(push); if (errRecord !== undefined) return errRecord; if (to !== -1 && this.info.records >= to) { this.state.stop = true; close(); return; } } this.state.commenting = false; pos += recordDelimiterLength - 1; continue; } if (this.state.commenting) { continue; } if ( comment !== null && (comment_no_infix === false || (this.state.record.length === 0 && this.state.field.length === 0)) ) { const commentCount = this.__compareBytes(comment, buf, pos, chr); if (commentCount !== 0) { this.state.commenting = true; continue; } } const delimiterLength = this.__isDelimiter(buf, pos, chr); if (delimiterLength !== 0) { this.info.bytes = this.state.bufBytesStart + pos; const errField = this.__onField(); if (errField !== undefined) return errField; pos += delimiterLength - 1; continue; } } } if (this.state.commenting === false) { if ( max_record_size !== 0 && this.state.record_length + this.state.field.length > max_record_size ) { return this.__error( new CsvError( "CSV_MAX_RECORD_SIZE", [ "Max Record Size:", "record exceed the maximum number of tolerated bytes", `of ${max_record_size}`, `at line ${this.info.lines}`, ], this.options, this.__infoField(), ), ); } } const lappend = ltrim === false || this.state.quoting === true || this.state.field.length !== 0 || !this.__isCharTrimable(buf, pos); // rtrim in non quoting is handle in __onField const rappend = rtrim === false || this.state.wasQuoting === false; if (lappend === true && rappend === true) { this.state.field.append(chr); } else if (rtrim === true && !this.__isCharTrimable(buf, pos)) { return this.__error( new CsvError( "CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE", [ "Invalid Closing Quote:", "found non trimable byte after quote", `at line ${this.info.lines}`, ], this.options, this.__infoField(), ), ); } else { if (lappend === false) { pos += this.__isCharTrimable(buf, pos) - 1; } continue; } } if (end === true) { // Ensure we are not ending in a quoting state if (this.state.quoting === true) { const err = this.__error( new CsvError( "CSV_QUOTE_NOT_CLOSED", [ "Quote Not Closed:", `the parsing is finished with an opening quote at line ${this.info.lines}`, ], this.options, this.__infoField(), ), ); if (err !== undefined) return err; } else { // Skip last line if it has no characters if ( this.state.wasQuoting === true || this.state.record.length !== 0 || this.state.field.length !== 0 ) { this.info.bytes = this.state.bufBytesStart + pos; const errField = this.__onField(); if (errField !== undefined) return errField; const errRecord = this.__onRecord(push); if (errRecord !== undefined) return errRecord; } else if (this.state.wasRowDelimiter === true) { this.info.empty_lines++; } else if (this.state.commenting === true) { this.info.comment_lines++; } } } else { this.state.bufBytesStart += pos; this.state.previousBuf = buf.slice(pos); } if (this.state.wasRowDelimiter === true) { this.info.lines++; this.state.wasRowDelimiter = false; } }, __onRecord: function (push) { const { columns, group_columns_by_name, encoding, info, from, relax_column_count, relax_column_count_less, relax_column_count_more, raw, skip_records_with_empty_values, } = this.options; const { enabled, record } = this.state; if (enabled === false) { return this.__resetRecord(); } // Convert the first line into column names const recordLength = record.length; if (columns === true) { if (skip_records_with_empty_values === true && isRecordEmpty(record)) { this.__resetRecord(); return; } return this.__firstLineToColumns(record); } if (columns === false && this.info.records === 0) { this.state.expectedRecordLength = recordLength; } if (recordLength !== this.state.expectedRecordLength) { const err = columns === false ? new CsvError( "CSV_RECORD_INCONSISTENT_FIELDS_LENGTH", [ "Invalid Record Length:", `expect ${this.state.expectedRecordLength},`, `got ${recordLength} on line ${this.info.lines}`, ], this.options, this.__infoField(), { record: record, }, ) : new CsvError( "CSV_RECORD_INCONSISTENT_COLUMNS", [ "Invalid Record Length:", `columns length is ${columns.length},`, // rename columns `got ${recordLength} on line ${this.info.lines}`, ], this.options, this.__infoField(), { record: record, }, ); if ( relax_column_count === true || (relax_column_count_less === true && recordLength < this.state.expectedRecordLength) || (relax_column_count_more === true && recordLength > this.state.expectedRecordLength) ) { this.info.invalid_field_length++; this.state.error = err; // Error is undefined with skip_records_with_error } else { const finalErr = this.__error(err); if (finalErr) return finalErr; } } if (skip_records_with_empty_values === true && isRecordEmpty(record)) { this.__resetRecord(); return; } if (this.state.recordHasError === true) { this.__resetRecord(); this.state.recordHasError = false; return; } this.info.records++; if (from === 1 || this.info.records >= from) { const { objname } = this.options; // With columns, records are object if (columns !== false) { const obj = {}; // Transform record array to an object for (let i = 0, l = record.length; i < l; i++) { if (columns[i] === undefined || columns[i].disabled) continue; // Turn duplicate columns into an array if ( group_columns_by_name === true && obj[columns[i].name] !== undefined ) { if (Array.isArray(obj[columns[i].name])) { obj[columns[i].name] = obj[columns[i].name].concat(record[i]); } else { obj[columns[i].name] = [obj[columns[i].name], record[i]]; } } else { obj[columns[i].name] = record[i]; } } // Without objname (default) if (raw === true || info === true) { const extRecord = Object.assign( { record: obj }, raw === true ? { raw: this.state.rawBuffer.toString(encoding) } : {}, info === true ? { info: this.__infoRecord() } : {}, ); const err = this.__push( objname === undefined ? extRecord : [obj[objname], extRecord], push, ); if (err) { return err; } } else { const err = this.__push( objname === undefined ? obj : [obj[objname], obj], push, ); if (err) { return err; } } // Without columns, records are array } else { if (raw === true || info === true) { const extRecord = Object.assign( { record: record }, raw === true ? { raw: this.state.rawBuffer.toString(encoding) } : {}, info === true ? { info: this.__infoRecord() } : {}, ); const err = this.__push( objname === undefined ? extRecord : [record[objname], extRecord], push, ); if (err) { return err; } } else { const err = this.__push( objname === undefined ? record : [record[objname], record], push, ); if (err) { return err; } } } } this.__resetRecord(); }, __firstLineToColumns: function (record) { const { firstLineToHeaders } = this.state; try { const headers = firstLineToHeaders === undefined ? record : firstLineToHeaders.call(null, record); if (!Array.isArray(headers)) { return this.__error( new CsvError( "CSV_INVALID_COLUMN_MAPPING", [ "Invalid Column Mapping:", "expect an array from column function,", `got ${JSON.stringify(headers)}`, ], this.options, this.__infoField(), { headers: headers, }, ), ); } const normalizedHeaders = normalize_columns_array(headers); this.state.expectedRecordLength = normalizedHeaders.length; this.options.columns = normalizedHeaders; this.__resetRecord(); return; } catch (err) { return err; } }, __resetRecord: function () { if (this.options.raw === true) { this.state.rawBuffer.reset(); } this.state.error = undefined; this.state.record = []; this.state.record_length = 0; }, __onField: function () { const { cast, encoding, rtrim, max_record_size } = this.options; const { enabled, wasQuoting } = this.state; // Short circuit for the from_line options if (enabled === false) { return this.__resetField(); } let field = this.state.field.toString(encoding); if (rtrim === true && wasQuoting === false) { field = field.trimRight(); } if (cast === true) { const [err, f] = this.__cast(field); if (err !== undefined) return err; field = f; } this.state.record.push(field); // Increment record length if record size must not exceed a limit if (max_record_size !== 0 && typeof field === "string") { this.state.record_length += field.length; } this.__resetField(); }, __resetField: function () { this.state.field.reset(); this.state.wasQuoting = false; }, __push: function (record, push) { const { on_record } = this.options; if (on_record !== undefined) { const info = this.__infoRecord(); try { record = on_record.call(null, record, info); } catch (err) { return err; } if (record === undefined || record === null) { return; } } push(record); }, // Return a tuple with the error and the casted value __cast: function (field) { const { columns, relax_column_count } = this.options; const isColumns = Array.isArray(columns); // Dont loose time calling cast // because the final record is an object // and this field can't be associated to a key present in columns if ( isColumns === true && relax_column_count && this.options.columns.length <= this.state.record.length ) { return [undefined, undefined]; } if (this.state.castField !== null) { try { const info = this.__infoField(); return [undefined, this.state.castField.call(null, field, info)]; } catch (err) { return [err]; } } if (this.__isFloat(field)) { return [undefined, parseFloat(field)]; } else if (this.options.cast_date !== false) { const info = this.__infoField(); return [undefined, this.options.cast_date.call(null, field, info)]; } return [undefined, field]; }, // Helper to test if a character is a space or a line delimiter __isCharTrimable: function (buf, pos) { const isTrim = (buf, pos) => { const { timchars } = this.state; loop1: for (let i = 0; i < timchars.length; i++) { const timchar = timchars[i]; for (let j = 0; j < timchar.length; j++) { if (timchar[j] !== buf[pos + j]) continue loop1; } return timchar.length; } return 0; }; return isTrim(buf, pos); }, // Keep it in case we implement the `cast_int` option // __isInt(value){ // // return Number.isInteger(parseInt(value)) // // return !isNaN( parseInt( obj ) ); // return /^(\-|\+)?[1-9][0-9]*$/.test(value) // } __isFloat: function (value) { return value - parseFloat(value) + 1 >= 0; // Borrowed from jquery }, __compareBytes: function (sourceBuf, targetBuf, targetPos, firstByte) { if (sourceBuf[0] !== firstByte) return 0; const sourceLength = sourceBuf.length; for (let i = 1; i < sourceLength; i++) { if (sourceBuf[i] !== targetBuf[targetPos + i]) return 0; } return sourceLength; }, __isDelimiter: function (buf, pos, chr) { const { delimiter, ignore_last_delimiters } = this.options; if ( ignore_last_delimiters === true && this.state.record.length === this.options.columns.length - 1 ) { return 0; } else if ( ignore_last_delimiters !== false && typeof ignore_last_delimiters === "number" && this.state.record.length === ignore_last_delimiters - 1 ) { return 0; } loop1: for (let i = 0; i < delimiter.length; i++) { const del = delimiter[i]; if (del[0] === chr) { for (let j = 1; j < del.length; j++) { if (del[j] !== buf[pos + j]) continue loop1; } return del.length; } } return 0; }, __isRecordDelimiter: function (chr, buf, pos) { const { record_delimiter } = this.options; const recordDelimiterLength = record_delimiter.length; loop1: for (let i = 0; i < recordDelimiterLength; i++) { const rd = record_delimiter[i]; const rdLength = rd.length; if (rd[0] !== chr) { continue; } for (let j = 1; j < rdLength; j++) { if (rd[j] !== buf[pos + j]) { continue loop1; } } return rd.length; } return 0; }, __isEscape: function (buf, pos, chr) { const { escape } = this.options; if (escape === null) return false; const l = escape.length; if (escape[0] === chr) { for (let i = 0; i < l; i++) { if (escape[i] !== buf[pos + i]) { return false; } } return true; } return false; }, __isQuote: function (buf, pos) { const { quote } = this.options; if (quote === null) return false; const l = quote.length; for (let i = 0; i < l; i++) { if (quote[i] !== buf[pos + i]) { return false; } } return true; }, __autoDiscoverRecordDelimiter: function (buf, pos) { const { encoding } = this.options; // Note, we don't need to cache this information in state, // It is only called on the first line until we find out a suitable // record delimiter. const rds = [ // Important, the windows line ending must be before mac os 9 Buffer.from("\r\n", encoding), Buffer.from("\n", encoding), Buffer.from("\r", encoding), ]; loop: for (let i = 0; i < rds.length; i++) { const l = rds[i].length; for (let j = 0; j < l; j++) { if (rds[i][j] !== buf[pos + j]) { continue loop; } } this.options.record_delimiter.push(rds[i]); this.state.recordDelimiterMaxLength = rds[i].length; return rds[i].length; } return 0; }, __error: function (msg) { const { encoding, raw, skip_records_with_error } = this.options; const err = typeof msg === "string" ? new Error(msg) : msg; if (skip_records_with_error) { this.state.recordHasError = true; if (this.options.on_skip !== undefined) { this.options.on_skip( err, raw ? this.state.rawBuffer.toString(encoding) : undefined, ); } // this.emit('skip', err, raw ? this.state.rawBuffer.toString(encoding) : undefined); return undefined; } else { return err; } }, __infoDataSet: function () { return { ...this.info, columns: this.options.columns, }; }, __infoRecord: function () { const { columns, raw, encoding } = this.options; return { ...this.__infoDataSet(), error: this.state.error, header: columns === true, index: this.state.record.length, raw: raw ? this.state.rawBuffer.toString(encoding) : undefined, }; }, __infoField: function () { const { columns } = this.options; const isColumns = Array.isArray(columns); return { ...this.__infoRecord(), column: isColumns === true ? columns.length > this.state.record.length ? columns[this.state.record.length].name : null : this.state.record.length, quoting: this.state.wasQuoting, }; }, }; }; /* CSV Parse Please look at the [project documentation](https://csv.js.org/parse/) for additional information. */ class Parser extends stream.Transform { constructor(opts = {}) { super({ ...{ readableObjectMode: true }, ...opts, encoding: null }); this.api = transform({ on_skip: (err, chunk) => { this.emit("skip", err, chunk); }, ...opts, }); // Backward compatibility this.state = this.api.state; this.options = this.api.options; this.info = this.api.info; } // Implementation of `Transform._transform` _transform(buf, _, callback) { if (this.state.stop === true) { return; } const err = this.api.parse( buf, false, (record) => { this.push(record); }, () => { this.push(null); this.end(); // Fix #333 and break #410 // ko: api.stream.iterator.coffee // ko with v21.4.0, ok with node v20.5.1: api.stream.finished # aborted (with generate()) // ko: api.stream.finished # aborted (with Readable) // this.destroy() // Fix #410 and partially break #333 // ok: api.stream.iterator.coffee // ok: api.stream.finished # aborted (with generate()) // broken: api.stream.finished # aborted (with Readable) this.on("end", this.destroy); }, ); if (err !== undefined) { this.state.stop = true; } callback(err); } // Implementation of `Transform._flush` _flush(callback) { if (this.state.stop === true) { return; } const err = this.api.parse( undefined, true, (record) => { this.push(record); }, () => { this.push(null); this.on("end", this.destroy); }, ); callback(err); } } const parse = function () { let data, options, callback; for (const i in arguments) { const argument = arguments[i]; const type = typeof argument; if ( data === undefined && (typeof argument === "string" || Buffer.isBuffer(argument)) ) { data = argument; } else if (options === undefined && is_object(argument)) { options = argument; } else if (callback === undefined && type === "function") { callback = argument; } else { throw new CsvError( "CSV_INVALID_ARGUMENT", ["Invalid argument:", `got ${JSON.stringify(argument)} at index ${i}`], options || {}, ); } } const parser = new Parser(options); if (callback) { const records = options === undefined || options.objname === undefined ? [] : {}; parser.on("readable", function () { let record; while ((record = this.read()) !== null) { if (options === undefined || options.objname === undefined) { records.push(record); } else { records[record[0]] = record[1]; } } }); parser.on("error", function (err) { callback(err, undefined, parser.api.__infoDataSet()); }); parser.on("end", function () { callback(undefined, records, parser.api.__infoDataSet()); }); } if (data !== undefined) { const writer = function () { parser.write(data); parser.end(); }; // Support Deno, Rollup doesnt provide a shim for setImmediate if (typeof setImmediate === "function") { setImmediate(writer); } else { setTimeout(writer, 0); } } return parser; }; exports.CsvError = CsvError; exports.Parser = Parser; exports.parse = parse;