1 | ;
|
2 | var __importDefault = (this && this.__importDefault) || function (mod) {
|
3 | return (mod && mod.__esModule) ? mod : { "default": mod };
|
4 | };
|
5 | Object.defineProperty(exports, "__esModule", { value: true });
|
6 | var strip_bom_1 = __importDefault(require("strip-bom"));
|
7 | /**
|
8 | * For each data chunk coming to parser:
|
9 | * 1. append the data to the buffer that is left from last chunk
|
10 | * 2. check if utf8 chars being split, if does, stripe the bytes and add to left buffer.
|
11 | * 3. stripBom
|
12 | */
|
13 | function prepareData(chunk, runtime) {
|
14 | var workChunk = concatLeftChunk(chunk, runtime);
|
15 | runtime.csvLineBuffer = undefined;
|
16 | var cleanCSVString = cleanUtf8Split(workChunk, runtime).toString("utf8");
|
17 | if (runtime.started === false) {
|
18 | return strip_bom_1.default(cleanCSVString);
|
19 | }
|
20 | else {
|
21 | return cleanCSVString;
|
22 | }
|
23 | }
|
24 | exports.prepareData = prepareData;
|
25 | /**
|
26 | * append data to buffer that is left form last chunk
|
27 | */
|
28 | function concatLeftChunk(chunk, runtime) {
|
29 | if (runtime.csvLineBuffer && runtime.csvLineBuffer.length > 0) {
|
30 | return Buffer.concat([runtime.csvLineBuffer, chunk]);
|
31 | }
|
32 | else {
|
33 | return chunk;
|
34 | }
|
35 | }
|
36 | /**
|
37 | * check if utf8 chars being split, if does, stripe the bytes and add to left buffer.
|
38 | */
|
39 | function cleanUtf8Split(chunk, runtime) {
|
40 | var idx = chunk.length - 1;
|
41 | /**
|
42 | * From Keyang:
|
43 | * The code below is to check if a single utf8 char (which could be multiple bytes) being split.
|
44 | * If the char being split, the buffer from two chunk needs to be concat
|
45 | * check how utf8 being encoded to understand the code below.
|
46 | * If anyone has any better way to do this, please let me know.
|
47 | */
|
48 | if ((chunk[idx] & 1 << 7) != 0) {
|
49 | while ((chunk[idx] & 3 << 6) === 128) {
|
50 | idx--;
|
51 | }
|
52 | idx--;
|
53 | }
|
54 | if (idx != chunk.length - 1) {
|
55 | runtime.csvLineBuffer = chunk.slice(idx + 1);
|
56 | return chunk.slice(0, idx + 1);
|
57 | // var _cb=cb;
|
58 | // var self=this;
|
59 | // cb=function(){
|
60 | // if (self._csvLineBuffer){
|
61 | // self._csvLineBuffer=Buffer.concat([bufFromString(self._csvLineBuffer,"utf8"),left]);
|
62 | // }else{
|
63 | // self._csvLineBuffer=left;
|
64 | // }
|
65 | // _cb();
|
66 | // }
|
67 | }
|
68 | else {
|
69 | return chunk;
|
70 | }
|
71 | }
|
72 | //# sourceMappingURL=dataClean.js.map |
\ | No newline at end of file |