1 | const { Transform } = require('stream')
|
2 | const bufferFrom = require('buffer-from')
|
3 | const bufferAlloc = require('buffer-alloc')
|
4 |
|
5 | const [cr] = bufferFrom('\r')
|
6 | const [nl] = bufferFrom('\n')
|
7 | const defaults = {
|
8 | escape: '"',
|
9 | headers: null,
|
10 | mapHeaders: ({ header }) => header,
|
11 | mapValues: ({ value }) => value,
|
12 | newline: '\n',
|
13 | quote: '"',
|
14 | raw: false,
|
15 | separator: ',',
|
16 | skipComments: false,
|
17 | skipLines: null,
|
18 | maxRowBytes: Number.MAX_SAFE_INTEGER,
|
19 | strict: false
|
20 | }
|
21 |
|
22 | class CsvParser extends Transform {
|
23 | constructor (opts = {}) {
|
24 | super({ objectMode: true, highWaterMark: 16 })
|
25 |
|
26 | if (Array.isArray(opts)) opts = { headers: opts }
|
27 |
|
28 | const options = Object.assign({}, defaults, opts)
|
29 |
|
30 | options.customNewline = options.newline !== defaults.newline
|
31 |
|
32 | for (const key of ['newline', 'quote', 'separator']) {
|
33 | if (typeof options[key] !== 'undefined') {
|
34 | ([options[key]] = bufferFrom(options[key]))
|
35 | }
|
36 | }
|
37 |
|
38 |
|
39 | options.escape = (opts || {}).escape ? bufferFrom(options.escape)[0] : options.quote
|
40 |
|
41 | this.state = {
|
42 | empty: options.raw ? bufferAlloc(0) : '',
|
43 | escaped: false,
|
44 | first: true,
|
45 | lineNumber: 0,
|
46 | previousEnd: 0,
|
47 | rowLength: 0,
|
48 | quoted: false
|
49 | }
|
50 |
|
51 | this._prev = null
|
52 |
|
53 | if (options.headers === false) {
|
54 |
|
55 | options.strict = false
|
56 | }
|
57 |
|
58 | if (options.headers || options.headers === false) {
|
59 | this.state.first = false
|
60 | }
|
61 |
|
62 | this.options = options
|
63 | this.headers = options.headers
|
64 | }
|
65 |
|
66 | parseCell (buffer, start, end) {
|
67 | const { escape, quote } = this.options
|
68 |
|
69 | if (buffer[start] === quote && buffer[end - 1] === quote) {
|
70 | start++
|
71 | end--
|
72 | }
|
73 |
|
74 | let y = start
|
75 |
|
76 | for (let i = start; i < end; i++) {
|
77 |
|
78 | if (buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote) {
|
79 | i++
|
80 | }
|
81 |
|
82 | if (y !== i) {
|
83 | buffer[y] = buffer[i]
|
84 | }
|
85 | y++
|
86 | }
|
87 |
|
88 | return this.parseValue(buffer, start, y)
|
89 | }
|
90 |
|
91 | parseLine (buffer, start, end) {
|
92 | const { customNewline, escape, mapHeaders, mapValues, quote, separator, skipComments, skipLines } = this.options
|
93 |
|
94 | end--
|
95 | if (!customNewline && buffer.length && buffer[end - 1] === cr) {
|
96 | end--
|
97 | }
|
98 |
|
99 | const comma = separator
|
100 | const cells = []
|
101 | let isQuoted = false
|
102 | let offset = start
|
103 |
|
104 | if (skipComments) {
|
105 | const char = typeof skipComments === 'string' ? skipComments : '#'
|
106 | if (buffer[start] === bufferFrom(char)[0]) {
|
107 | return
|
108 | }
|
109 | }
|
110 |
|
111 | const mapValue = (value) => {
|
112 | if (this.state.first) {
|
113 | return value
|
114 | }
|
115 |
|
116 | const index = cells.length
|
117 | const header = this.headers[index]
|
118 |
|
119 | return mapValues({ header, index, value })
|
120 | }
|
121 |
|
122 | for (let i = start; i < end; i++) {
|
123 | const isStartingQuote = !isQuoted && buffer[i] === quote
|
124 | const isEndingQuote = isQuoted && buffer[i] === quote && i + 1 <= end && buffer[i + 1] === comma
|
125 | const isEscape = isQuoted && buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote
|
126 |
|
127 | if (isStartingQuote || isEndingQuote) {
|
128 | isQuoted = !isQuoted
|
129 | continue
|
130 | } else if (isEscape) {
|
131 | i++
|
132 | continue
|
133 | }
|
134 |
|
135 | if (buffer[i] === comma && !isQuoted) {
|
136 | let value = this.parseCell(buffer, offset, i)
|
137 | value = mapValue(value)
|
138 | cells.push(value)
|
139 | offset = i + 1
|
140 | }
|
141 | }
|
142 |
|
143 | if (offset < end) {
|
144 | let value = this.parseCell(buffer, offset, end)
|
145 | value = mapValue(value)
|
146 | cells.push(value)
|
147 | }
|
148 |
|
149 | if (buffer[end - 1] === comma) {
|
150 | cells.push(mapValue(this.state.empty))
|
151 | }
|
152 |
|
153 | const skip = skipLines && skipLines > this.state.lineNumber
|
154 | this.state.lineNumber++
|
155 |
|
156 | if (this.state.first && !skip) {
|
157 | this.state.first = false
|
158 | this.headers = cells.map((header, index) => mapHeaders({ header, index }))
|
159 |
|
160 | this.emit('headers', this.headers)
|
161 | return
|
162 | }
|
163 |
|
164 | if (!skip && this.options.strict && cells.length !== this.headers.length) {
|
165 | const e = new RangeError('Row length does not match headers')
|
166 | this.emit('error', e)
|
167 | } else {
|
168 | if (!skip) this.writeRow(cells)
|
169 | }
|
170 | }
|
171 |
|
172 | parseValue (buffer, start, end) {
|
173 | if (this.options.raw) {
|
174 | return buffer.slice(start, end)
|
175 | }
|
176 |
|
177 | return buffer.toString('utf-8', start, end)
|
178 | }
|
179 |
|
180 | writeRow (cells) {
|
181 | if (this.headers === false || cells.length > this.headers.length) {
|
182 | this.headers = cells.map((value, index) => index)
|
183 | }
|
184 |
|
185 | const row = cells.reduce((o, cell, index) => {
|
186 | const header = this.headers[index]
|
187 | if (header !== null) {
|
188 | o[header] = cell
|
189 | }
|
190 | return o
|
191 | }, {})
|
192 |
|
193 | this.push(row)
|
194 | }
|
195 |
|
196 | _flush (cb) {
|
197 | if (this.state.escaped || !this._prev) return cb()
|
198 | this.parseLine(this._prev, this.state.previousEnd, this._prev.length + 1)
|
199 | cb()
|
200 | }
|
201 |
|
202 | _transform (data, enc, cb) {
|
203 | if (typeof data === 'string') {
|
204 | data = bufferFrom(data)
|
205 | }
|
206 |
|
207 | const { escape, quote } = this.options
|
208 | let start = 0
|
209 | let buffer = data
|
210 |
|
211 | if (this._prev) {
|
212 | start = this._prev.length
|
213 | buffer = Buffer.concat([this._prev, data])
|
214 | this._prev = null
|
215 | }
|
216 |
|
217 | const bufferLength = buffer.length
|
218 |
|
219 | for (let i = start; i < bufferLength; i++) {
|
220 | const chr = buffer[i]
|
221 | const nextChr = i + 1 < bufferLength ? buffer[i + 1] : null
|
222 |
|
223 | this.state.rowLength++
|
224 | if (this.state.rowLength > this.options.maxRowBytes) {
|
225 | return cb(new Error('Row exceeds the maximum size'))
|
226 | }
|
227 |
|
228 | if (!this.state.escaped && chr === escape && nextChr === quote && i !== start) {
|
229 | this.state.escaped = true
|
230 | continue
|
231 | } else if (chr === quote) {
|
232 | if (this.state.escaped) {
|
233 | this.state.escaped = false
|
234 |
|
235 | } else {
|
236 | this.state.quoted = !this.state.quoted
|
237 | }
|
238 | continue
|
239 | }
|
240 |
|
241 | if (!this.state.quoted) {
|
242 | if (this.state.first && !this.options.customNewline) {
|
243 | if (chr === nl) {
|
244 | this.options.newline = nl
|
245 | } else if (chr === cr) {
|
246 | if (nextChr !== nl) {
|
247 | this.options.newline = cr
|
248 | }
|
249 | }
|
250 | }
|
251 |
|
252 | if (chr === this.options.newline) {
|
253 | this.parseLine(buffer, this.state.previousEnd, i + 1)
|
254 | this.state.previousEnd = i + 1
|
255 | this.state.rowLength = 0
|
256 | }
|
257 | }
|
258 | }
|
259 |
|
260 | if (this.state.previousEnd === bufferLength) {
|
261 | this.state.previousEnd = 0
|
262 | return cb()
|
263 | }
|
264 |
|
265 | if (bufferLength - this.state.previousEnd < data.length) {
|
266 | this._prev = data
|
267 | this.state.previousEnd -= (bufferLength - data.length)
|
268 | return cb()
|
269 | }
|
270 |
|
271 | this._prev = buffer
|
272 | cb()
|
273 | }
|
274 | }
|
275 |
|
276 | module.exports = (opts) => new CsvParser(opts)
|