UNPKG

7.04 kBJavaScriptView Raw
1const { Transform } = require('stream')
2const bufferFrom = require('buffer-from')
3const bufferAlloc = require('buffer-alloc')
4
5const [cr] = bufferFrom('\r')
6const [nl] = bufferFrom('\n')
7const defaults = {
8 escape: '"',
9 headers: null,
10 mapHeaders: ({ header }) => header,
11 mapValues: ({ value }) => value,
12 newline: '\n',
13 quote: '"',
14 raw: false,
15 separator: ',',
16 skipComments: false,
17 skipLines: null,
18 maxRowBytes: Number.MAX_SAFE_INTEGER,
19 strict: false
20}
21
22class CsvParser extends Transform {
23 constructor (opts = {}) {
24 super({ objectMode: true, highWaterMark: 16 })
25
26 if (Array.isArray(opts)) opts = { headers: opts }
27
28 const options = Object.assign({}, defaults, opts)
29
30 options.customNewline = options.newline !== defaults.newline
31
32 for (const key of ['newline', 'quote', 'separator']) {
33 if (typeof options[key] !== 'undefined') {
34 ([options[key]] = bufferFrom(options[key]))
35 }
36 }
37
38 // if escape is not defined on the passed options, use the end value of quote
39 options.escape = (opts || {}).escape ? bufferFrom(options.escape)[0] : options.quote
40
41 this.state = {
42 empty: options.raw ? bufferAlloc(0) : '',
43 escaped: false,
44 first: true,
45 lineNumber: 0,
46 previousEnd: 0,
47 rowLength: 0,
48 quoted: false
49 }
50
51 this._prev = null
52
53 if (options.headers === false) {
54 // enforce, as the column length check will fail if headers:false
55 options.strict = false
56 }
57
58 if (options.headers || options.headers === false) {
59 this.state.first = false
60 }
61
62 this.options = options
63 this.headers = options.headers
64 }
65
66 parseCell (buffer, start, end) {
67 const { escape, quote } = this.options
68 // remove quotes from quoted cells
69 if (buffer[start] === quote && buffer[end - 1] === quote) {
70 start++
71 end--
72 }
73
74 let y = start
75
76 for (let i = start; i < end; i++) {
77 // check for escape characters and skip them
78 if (buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote) {
79 i++
80 }
81
82 if (y !== i) {
83 buffer[y] = buffer[i]
84 }
85 y++
86 }
87
88 return this.parseValue(buffer, start, y)
89 }
90
91 parseLine (buffer, start, end) {
92 const { customNewline, escape, mapHeaders, mapValues, quote, separator, skipComments, skipLines } = this.options
93
94 end-- // trim newline
95 if (!customNewline && buffer.length && buffer[end - 1] === cr) {
96 end--
97 }
98
99 const comma = separator
100 const cells = []
101 let isQuoted = false
102 let offset = start
103
104 if (skipComments) {
105 const char = typeof skipComments === 'string' ? skipComments : '#'
106 if (buffer[start] === bufferFrom(char)[0]) {
107 return
108 }
109 }
110
111 const mapValue = (value) => {
112 if (this.state.first) {
113 return value
114 }
115
116 const index = cells.length
117 const header = this.headers[index]
118
119 return mapValues({ header, index, value })
120 }
121
122 for (let i = start; i < end; i++) {
123 const isStartingQuote = !isQuoted && buffer[i] === quote
124 const isEndingQuote = isQuoted && buffer[i] === quote && i + 1 <= end && buffer[i + 1] === comma
125 const isEscape = isQuoted && buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote
126
127 if (isStartingQuote || isEndingQuote) {
128 isQuoted = !isQuoted
129 continue
130 } else if (isEscape) {
131 i++
132 continue
133 }
134
135 if (buffer[i] === comma && !isQuoted) {
136 let value = this.parseCell(buffer, offset, i)
137 value = mapValue(value)
138 cells.push(value)
139 offset = i + 1
140 }
141 }
142
143 if (offset < end) {
144 let value = this.parseCell(buffer, offset, end)
145 value = mapValue(value)
146 cells.push(value)
147 }
148
149 if (buffer[end - 1] === comma) {
150 cells.push(mapValue(this.state.empty))
151 }
152
153 const skip = skipLines && skipLines > this.state.lineNumber
154 this.state.lineNumber++
155
156 if (this.state.first && !skip) {
157 this.state.first = false
158 this.headers = cells.map((header, index) => mapHeaders({ header, index }))
159
160 this.emit('headers', this.headers)
161 return
162 }
163
164 if (!skip && this.options.strict && cells.length !== this.headers.length) {
165 const e = new RangeError('Row length does not match headers')
166 this.emit('error', e)
167 } else {
168 if (!skip) this.writeRow(cells)
169 }
170 }
171
172 parseValue (buffer, start, end) {
173 if (this.options.raw) {
174 return buffer.slice(start, end)
175 }
176
177 return buffer.toString('utf-8', start, end)
178 }
179
180 writeRow (cells) {
181 if (this.headers === false || cells.length > this.headers.length) {
182 this.headers = cells.map((value, index) => index)
183 }
184
185 const row = cells.reduce((o, cell, index) => {
186 const header = this.headers[index]
187 if (header !== null) {
188 o[header] = cell
189 }
190 return o
191 }, {})
192
193 this.push(row)
194 }
195
196 _flush (cb) {
197 if (this.state.escaped || !this._prev) return cb()
198 this.parseLine(this._prev, this.state.previousEnd, this._prev.length + 1) // plus since online -1s
199 cb()
200 }
201
202 _transform (data, enc, cb) {
203 if (typeof data === 'string') {
204 data = bufferFrom(data)
205 }
206
207 const { escape, quote } = this.options
208 let start = 0
209 let buffer = data
210
211 if (this._prev) {
212 start = this._prev.length
213 buffer = Buffer.concat([this._prev, data])
214 this._prev = null
215 }
216
217 const bufferLength = buffer.length
218
219 for (let i = start; i < bufferLength; i++) {
220 const chr = buffer[i]
221 const nextChr = i + 1 < bufferLength ? buffer[i + 1] : null
222
223 this.state.rowLength++
224 if (this.state.rowLength > this.options.maxRowBytes) {
225 return cb(new Error('Row exceeds the maximum size'))
226 }
227
228 if (!this.state.escaped && chr === escape && nextChr === quote && i !== start) {
229 this.state.escaped = true
230 continue
231 } else if (chr === quote) {
232 if (this.state.escaped) {
233 this.state.escaped = false
234 // non-escaped quote (quoting the cell)
235 } else {
236 this.state.quoted = !this.state.quoted
237 }
238 continue
239 }
240
241 if (!this.state.quoted) {
242 if (this.state.first && !this.options.customNewline) {
243 if (chr === nl) {
244 this.options.newline = nl
245 } else if (chr === cr) {
246 if (nextChr !== nl) {
247 this.options.newline = cr
248 }
249 }
250 }
251
252 if (chr === this.options.newline) {
253 this.parseLine(buffer, this.state.previousEnd, i + 1)
254 this.state.previousEnd = i + 1
255 this.state.rowLength = 0
256 }
257 }
258 }
259
260 if (this.state.previousEnd === bufferLength) {
261 this.state.previousEnd = 0
262 return cb()
263 }
264
265 if (bufferLength - this.state.previousEnd < data.length) {
266 this._prev = data
267 this.state.previousEnd -= (bufferLength - data.length)
268 return cb()
269 }
270
271 this._prev = buffer
272 cb()
273 }
274}
275
276module.exports = (opts) => new CsvParser(opts)