1 | import { Processor, ProcessLineResult } from "./Processor";
|
2 | import P from "bluebird";
|
3 | import { prepareData } from "./dataClean";
|
4 | import getEol from "./getEol";
|
5 | import { stringToLines } from "./fileline";
|
6 | import { bufFromString, filterArray,trimLeft } from "./util";
|
7 | import { RowSplit } from "./rowSplit";
|
8 | import lineToJson from "./lineToJson";
|
9 | import { ParseRuntime } from "./ParseRuntime";
|
10 | import CSVError from "./CSVError";
|
11 |
|
12 |
|
13 |
|
14 | export class ProcessorLocal extends Processor {
|
15 | flush(): P<ProcessLineResult[]> {
|
16 | if (this.runtime.csvLineBuffer && this.runtime.csvLineBuffer.length > 0) {
|
17 | const buf = this.runtime.csvLineBuffer;
|
18 | this.runtime.csvLineBuffer = undefined;
|
19 | return this.process(buf, true)
|
20 | .then((res) => {
|
21 | if (this.runtime.csvLineBuffer && this.runtime.csvLineBuffer.length > 0) {
|
22 | return P.reject(CSVError.unclosed_quote(this.runtime.parsedLineNumber, this.runtime.csvLineBuffer.toString()))
|
23 | } else {
|
24 | return P.resolve(res);
|
25 | }
|
26 | })
|
27 | } else {
|
28 | return P.resolve([]);
|
29 | }
|
30 | }
|
31 | destroy(): P<void> {
|
32 | return P.resolve();
|
33 | }
|
34 | private rowSplit: RowSplit = new RowSplit(this.converter);
|
35 | private eolEmitted = false;
|
36 | private _needEmitEol?: boolean = undefined;
|
37 | private get needEmitEol() {
|
38 | if (this._needEmitEol === undefined) {
|
39 | this._needEmitEol = this.converter.listeners("eol").length > 0;
|
40 | }
|
41 | return this._needEmitEol;
|
42 | }
|
43 | private headEmitted = false;
|
44 | private _needEmitHead?: boolean = undefined;
|
45 | private get needEmitHead() {
|
46 | if (this._needEmitHead === undefined) {
|
47 | this._needEmitHead = this.converter.listeners("header").length > 0;
|
48 | }
|
49 | return this._needEmitHead;
|
50 |
|
51 | }
|
52 | process(chunk: Buffer, finalChunk = false): P<ProcessLineResult[]> {
|
53 | let csvString: string;
|
54 | if (finalChunk) {
|
55 | csvString = chunk.toString();
|
56 | } else {
|
57 | csvString = prepareData(chunk, this.converter.parseRuntime);
|
58 |
|
59 | }
|
60 | return P.resolve()
|
61 | .then(() => {
|
62 | if (this.runtime.preRawDataHook) {
|
63 | return this.runtime.preRawDataHook(csvString);
|
64 | } else {
|
65 | return csvString;
|
66 | }
|
67 | })
|
68 | .then((csv) => {
|
69 | if (csv && csv.length > 0) {
|
70 | return this.processCSV(csv, finalChunk);
|
71 | } else {
|
72 | return P.resolve([]);
|
73 | }
|
74 | })
|
75 | }
|
76 | private processCSV(csv: string, finalChunk: boolean): P<ProcessLineResult[]> {
|
77 | const params = this.params;
|
78 | const runtime = this.runtime;
|
79 | if (!runtime.eol) {
|
80 | getEol(csv, runtime);
|
81 | }
|
82 | if (this.needEmitEol && !this.eolEmitted && runtime.eol) {
|
83 | this.converter.emit("eol", runtime.eol);
|
84 | this.eolEmitted = true;
|
85 | }
|
86 |
|
87 | if (params.ignoreEmpty && !runtime.started) {
|
88 | csv = trimLeft(csv);
|
89 | }
|
90 | const stringToLineResult = stringToLines(csv, runtime);
|
91 | if (!finalChunk) {
|
92 | this.prependLeftBuf(bufFromString(stringToLineResult.partial));
|
93 | } else {
|
94 | stringToLineResult.lines.push(stringToLineResult.partial);
|
95 | stringToLineResult.partial = "";
|
96 | }
|
97 | if (stringToLineResult.lines.length > 0) {
|
98 | let prom: P<string[]>;
|
99 | if (runtime.preFileLineHook) {
|
100 | prom = this.runPreLineHook(stringToLineResult.lines);
|
101 | } else {
|
102 | prom = P.resolve(stringToLineResult.lines);
|
103 | }
|
104 | return prom.then((lines) => {
|
105 | if (!runtime.started
|
106 | && !this.runtime.headers
|
107 | ) {
|
108 | return this.processDataWithHead(lines);
|
109 | } else {
|
110 | return this.processCSVBody(lines);
|
111 | }
|
112 |
|
113 | })
|
114 |
|
115 | } else {
|
116 |
|
117 | return P.resolve([]);
|
118 | }
|
119 |
|
120 | }
|
121 | private processDataWithHead(lines: string[]): ProcessLineResult[] {
|
122 | if (this.params.noheader) {
|
123 | if (this.params.headers) {
|
124 | this.runtime.headers = this.params.headers;
|
125 | } else {
|
126 | this.runtime.headers = [];
|
127 | }
|
128 | } else {
|
129 | let left = "";
|
130 | let headerRow: string[] = [];
|
131 | while (lines.length) {
|
132 | const line = left + lines.shift();
|
133 | const row = this.rowSplit.parse(line);
|
134 | if (row.closed) {
|
135 | headerRow = row.cells;
|
136 | left = "";
|
137 | break;
|
138 | } else {
|
139 | left = line + getEol(line, this.runtime);
|
140 | }
|
141 | }
|
142 | this.prependLeftBuf(bufFromString(left));
|
143 |
|
144 | if (headerRow.length === 0) {
|
145 | return [];
|
146 | }
|
147 | if (this.params.headers) {
|
148 | this.runtime.headers = this.params.headers;
|
149 | } else {
|
150 | this.runtime.headers = headerRow;
|
151 | }
|
152 | }
|
153 | if (this.runtime.needProcessIgnoreColumn || this.runtime.needProcessIncludeColumn) {
|
154 | this.filterHeader();
|
155 | }
|
156 | if (this.needEmitHead && !this.headEmitted) {
|
157 | this.converter.emit("header", this.runtime.headers);
|
158 | this.headEmitted = true;
|
159 | }
|
160 | return this.processCSVBody(lines);
|
161 | }
|
162 | private filterHeader() {
|
163 | this.runtime.selectedColumns = [];
|
164 | if (this.runtime.headers) {
|
165 | const headers = this.runtime.headers;
|
166 | for (let i = 0; i < headers.length; i++) {
|
167 | if (this.params.ignoreColumns) {
|
168 | if (this.params.ignoreColumns.test(headers[i])) {
|
169 | if (this.params.includeColumns && this.params.includeColumns.test(headers[i])) {
|
170 | this.runtime.selectedColumns.push(i);
|
171 | } else {
|
172 | continue;
|
173 | }
|
174 | } else {
|
175 | this.runtime.selectedColumns.push(i);
|
176 | }
|
177 | } else if (this.params.includeColumns) {
|
178 | if (this.params.includeColumns.test(headers[i])) {
|
179 | this.runtime.selectedColumns.push(i);
|
180 | }
|
181 | } else {
|
182 | this.runtime.selectedColumns.push(i);
|
183 | }
|
184 |
|
185 |
|
186 |
|
187 |
|
188 |
|
189 |
|
190 |
|
191 |
|
192 |
|
193 |
|
194 |
|
195 |
|
196 | }
|
197 | this.runtime.headers = filterArray(this.runtime.headers, this.runtime.selectedColumns);
|
198 | }
|
199 |
|
200 | }
|
201 | private processCSVBody(lines: string[]): ProcessLineResult[] {
|
202 | if (this.params.output === "line") {
|
203 | return lines;
|
204 | } else {
|
205 | const result = this.rowSplit.parseMultiLines(lines);
|
206 | this.prependLeftBuf(bufFromString(result.partial));
|
207 | if (this.params.output === "csv") {
|
208 | return result.rowsCells;
|
209 | } else {
|
210 | return lineToJson(result.rowsCells, this.converter);
|
211 | }
|
212 | }
|
213 |
|
214 |
|
215 |
|
216 |
|
217 |
|
218 | }
|
219 |
|
220 | private prependLeftBuf(buf: Buffer) {
|
221 | if (buf) {
|
222 | if (this.runtime.csvLineBuffer) {
|
223 | this.runtime.csvLineBuffer = Buffer.concat([buf, this.runtime.csvLineBuffer]);
|
224 | } else {
|
225 | this.runtime.csvLineBuffer = buf;
|
226 | }
|
227 | }
|
228 |
|
229 | }
|
230 | private runPreLineHook(lines: string[]): P<string[]> {
|
231 | return new P((resolve, reject) => {
|
232 | processLineHook(lines, this.runtime, 0, (err) => {
|
233 | if (err) {
|
234 | reject(err);
|
235 | } else {
|
236 | resolve(lines);
|
237 | }
|
238 | })
|
239 | });
|
240 | }
|
241 | }
|
242 |
|
243 | function processLineHook(lines: string[], runtime: ParseRuntime, offset: number,
|
244 | cb: (err?) => void
|
245 | ) {
|
246 | if (offset >= lines.length) {
|
247 | cb();
|
248 | } else {
|
249 | if (runtime.preFileLineHook) {
|
250 | const line = lines[offset];
|
251 | const res = runtime.preFileLineHook(line, runtime.parsedLineNumber + offset);
|
252 | offset++;
|
253 | if (res && (res as PromiseLike<string>).then) {
|
254 | (res as PromiseLike<string>).then((value) => {
|
255 | lines[offset - 1] = value;
|
256 | processLineHook(lines, runtime, offset, cb);
|
257 | });
|
258 | } else {
|
259 | lines[offset - 1] = res as string;
|
260 | while (offset < lines.length) {
|
261 | lines[offset] = runtime.preFileLineHook(lines[offset], runtime.parsedLineNumber + offset) as string;
|
262 | offset++;
|
263 | }
|
264 | cb();
|
265 | }
|
266 | } else {
|
267 | cb();
|
268 | }
|
269 | }
|
270 | } |
\ | No newline at end of file |