UNPKG

csvtojson/v2/dataClean.js

Version:

2.35 kBJavaScriptView Raw

1"use strict";
2var __importDefault = (this && this.__importDefault) || function (mod) {
  return (mod && mod.__esModule) ? mod : { "default": mod };
4};
5Object.defineProperty(exports, "__esModule", { value: true });
6var strip_bom_1 = __importDefault(require("strip-bom"));
7/**
* For each data chunk coming to parser:
* 1. append the data to the buffer that is left from last chunk
* 2. check if utf8 chars being split, if does, stripe the bytes and add to left buffer.
* 3. stripBom
*/
13function prepareData(chunk, runtime) {
  var workChunk = concatLeftChunk(chunk, runtime);
  runtime.csvLineBuffer = undefined;
  var cleanCSVString = cleanUtf8Split(workChunk, runtime).toString("utf8");
  if (runtime.started === false) {
      return strip_bom_1.default(cleanCSVString);
  }
  else {
      return cleanCSVString;
  }
23}
24exports.prepareData = prepareData;
25/**
*  append data to buffer that is left form last chunk
*/
28function concatLeftChunk(chunk, runtime) {
  if (runtime.csvLineBuffer && runtime.csvLineBuffer.length > 0) {
      return Buffer.concat([runtime.csvLineBuffer, chunk]);
  }
  else {
      return chunk;
  }
35}
36/**
* check if utf8 chars being split, if does, stripe the bytes and add to left buffer.
*/
39function cleanUtf8Split(chunk, runtime) {
  var idx = chunk.length - 1;
  /**
   * From Keyang:
   * The code below is to check if a single utf8 char (which could be multiple bytes) being split.
   * If the char being split, the buffer from two chunk needs to be concat
   * check how utf8 being encoded to understand the code below.
   * If anyone has any better way to do this, please let me know.
   */
  if ((chunk[idx] & 1 << 7) != 0) {
      while ((chunk[idx] & 3 << 6) === 128) {
          idx--;
      }
      idx--;
  }
  if (idx != chunk.length - 1) {
      runtime.csvLineBuffer = chunk.slice(idx + 1);
      return chunk.slice(0, idx + 1);
      // var _cb=cb;
      // var self=this;
      // cb=function(){
      //   if (self._csvLineBuffer){
      //     self._csvLineBuffer=Buffer.concat([bufFromString(self._csvLineBuffer,"utf8"),left]);
      //   }else{
      //     self._csvLineBuffer=left;
      //   }
      //   _cb();
      // }
  }
  else {
      return chunk;
  }
71}
72//# sourceMappingURL=dataClean.js.map
\No newline at end of file

1	`"use strict";`
2	`var __importDefault = (this && this.__importDefault) \|\| function (mod) {`
3	`return (mod && mod.__esModule) ? mod : { "default": mod };`
4	`};`
5	`Object.defineProperty(exports, "__esModule", { value: true });`
6	`var strip_bom_1 = __importDefault(require("strip-bom"));`
7	`/**`
8	`* For each data chunk coming to parser:`
9	`* 1. append the data to the buffer that is left from last chunk`
10	`* 2. check if utf8 chars being split, if does, stripe the bytes and add to left buffer.`
11	`* 3. stripBom`
12	`*/`
13	`function prepareData(chunk, runtime) {`
14	`var workChunk = concatLeftChunk(chunk, runtime);`
15	`runtime.csvLineBuffer = undefined;`
16	`var cleanCSVString = cleanUtf8Split(workChunk, runtime).toString("utf8");`
17	`if (runtime.started === false) {`
18	`return strip_bom_1.default(cleanCSVString);`
19	`}`
20	`else {`
21	`return cleanCSVString;`
22	`}`
23	`}`
24	`exports.prepareData = prepareData;`
25	`/**`
26	`* append data to buffer that is left form last chunk`
27	`*/`
28	`function concatLeftChunk(chunk, runtime) {`
29	`if (runtime.csvLineBuffer && runtime.csvLineBuffer.length > 0) {`
30	`return Buffer.concat([runtime.csvLineBuffer, chunk]);`
31	`}`
32	`else {`
33	`return chunk;`
34	`}`
35	`}`
36	`/**`
37	`* check if utf8 chars being split, if does, stripe the bytes and add to left buffer.`
38	`*/`
39	`function cleanUtf8Split(chunk, runtime) {`
40	`var idx = chunk.length - 1;`
41	`/**`
42	`* From Keyang:`
43	`* The code below is to check if a single utf8 char (which could be multiple bytes) being split.`
44	`* If the char being split, the buffer from two chunk needs to be concat`
45	`* check how utf8 being encoded to understand the code below.`
46	`* If anyone has any better way to do this, please let me know.`
47	`*/`
48	`if ((chunk[idx] & 1 << 7) != 0) {`
49	`while ((chunk[idx] & 3 << 6) === 128) {`
50	`idx--;`
51	`}`
52	`idx--;`
53	`}`
54	`if (idx != chunk.length - 1) {`
55	`runtime.csvLineBuffer = chunk.slice(idx + 1);`
56	`return chunk.slice(0, idx + 1);`
57	`// var _cb=cb;`
58	`// var self=this;`
59	`// cb=function(){`
60	`// if (self._csvLineBuffer){`
61	`// self._csvLineBuffer=Buffer.concat([bufFromString(self._csvLineBuffer,"utf8"),left]);`
62	`// }else{`
63	`// self._csvLineBuffer=left;`
64	`// }`
65	`// _cb();`
66	`// }`
67	`}`
68	`else {`
69	`return chunk;`
70	`}`
71	`}`
72	`//# sourceMappingURL=dataClean.js.map`
\	No newline at end of file