UNPKG

21.8 kBJavaScriptView Raw
1// deno-lint-ignore-file no-unused-vars no-case-declarations
2// Modernized/de-nodified version of creationix/jsonparse
3// Copyright (c) 2012 Tim Caswell
4// Licensed under the MIT (licenses/MIT.md) license.
5// TODO: TypeScript conversion?
6// TODO: Integrate with other modules for better performance
7// Named constants with unique integer values
8const C = {};
9// Tokens
10const LEFT_BRACE = C.LEFT_BRACE = 0x1;
11const RIGHT_BRACE = C.RIGHT_BRACE = 0x2;
12const LEFT_BRACKET = C.LEFT_BRACKET = 0x3;
13const RIGHT_BRACKET = C.RIGHT_BRACKET = 0x4;
14const COLON = C.COLON = 0x5;
15const COMMA = C.COMMA = 0x6;
16const TRUE = C.TRUE = 0x7;
17const FALSE = C.FALSE = 0x8;
18const NULL = C.NULL = 0x9;
19const STRING = C.STRING = 0xa;
20const NUMBER = C.NUMBER = 0xb;
21// Tokenizer States
22const START = C.START = 0x11;
23const STOP = C.STOP = 0x12;
24const TRUE1 = C.TRUE1 = 0x21;
25const TRUE2 = C.TRUE2 = 0x22;
26const TRUE3 = C.TRUE3 = 0x23;
27const FALSE1 = C.FALSE1 = 0x31;
28const FALSE2 = C.FALSE2 = 0x32;
29const FALSE3 = C.FALSE3 = 0x33;
30const FALSE4 = C.FALSE4 = 0x34;
31const NULL1 = C.NULL1 = 0x41;
32const NULL2 = C.NULL2 = 0x42;
33const NULL3 = C.NULL3 = 0x43;
34const NUMBER1 = C.NUMBER1 = 0x51;
35const NUMBER3 = C.NUMBER3 = 0x53;
36const STRING1 = C.STRING1 = 0x61;
37const STRING2 = C.STRING2 = 0x62;
38const STRING3 = C.STRING3 = 0x63;
39const STRING4 = C.STRING4 = 0x64;
40const STRING5 = C.STRING5 = 0x65;
41const STRING6 = C.STRING6 = 0x66;
42// Parser States
43const VALUE = C.VALUE = 0x71;
44const KEY = C.KEY = 0x72;
45// Parser Modes
46const OBJECT = C.OBJECT = 0x81;
47const ARRAY = C.ARRAY = 0x82;
48// Character constants
49const BACK_SLASH = "\\".charCodeAt(0);
50const FORWARD_SLASH = "\/".charCodeAt(0);
51const BACKSPACE = "\b".charCodeAt(0);
52const FORM_FEED = "\f".charCodeAt(0);
53const NEWLINE = "\n".charCodeAt(0);
54const CARRIAGE_RETURN = "\r".charCodeAt(0);
55const TAB = "\t".charCodeAt(0);
56const STRING_BUFFER_SIZE = 64 * 1024;
57function alloc(size) {
58 return new Uint8Array(size);
59}
60class Parser {
61 constructor() {
62 this.tState = START;
63 this.value = undefined;
64 this.string = undefined; // string data
65 this.stringBuffer = alloc(STRING_BUFFER_SIZE);
66 this.stringBufferOffset = 0;
67 this.unicode = undefined; // unicode escapes
68 this.highSurrogate = undefined;
69 this.key = undefined;
70 this.mode = undefined;
71 this.stack = [];
72 this.state = VALUE;
73 this.bytes_remaining = 0; // number of bytes remaining in multi byte utf8 char to read after split boundary
74 this.bytes_in_sequence = 0; // bytes in multi byte utf8 char to read
75 this.temp_buffs = { "2": alloc(2), "3": alloc(3), "4": alloc(4) }; // for rebuilding chars split before boundary is reached
76 this.encoder = new TextEncoder();
77 this.decoder = new TextDecoder();
78 // Stream offset
79 this.offset = -1;
80 }
81 // Slow code to string converter (only used when throwing syntax errors)
82 static toknam(code) {
83 const keys = Object.keys(C);
84 for (let i = 0, l = keys.length; i < l; i++) {
85 const key = keys[i];
86 if (C[key] === code) {
87 return key;
88 }
89 }
90 return code && ("0x" + code.toString(16));
91 }
92 encode(string) { return this.encoder.encode(string); }
93 decode(buffer) { return this.decoder.decode(buffer); }
94 onError(err) { throw err; }
95 charError(buffer, i) {
96 this.tState = STOP;
97 this.onError(new Error("Unexpected " + JSON.stringify(String.fromCharCode(buffer[i])) + " at position " + i + " in state " + Parser.toknam(this.tState)));
98 }
99 appendStringChar(char) {
100 if (this.stringBufferOffset >= STRING_BUFFER_SIZE) {
101 this.string += this.decode(this.stringBuffer);
102 this.stringBufferOffset = 0;
103 }
104 this.stringBuffer[this.stringBufferOffset++] = char;
105 }
106 appendStringBuf(buf, start, end) {
107 let size = buf.length;
108 if (typeof start === 'number') {
109 if (typeof end === 'number') {
110 if (end < 0) {
111 // adding a negative end decreeses the size
112 size = buf.length - start + end;
113 }
114 else {
115 size = end - start;
116 }
117 }
118 else {
119 size = buf.length - start;
120 }
121 }
122 if (size < 0) {
123 size = 0;
124 }
125 if (this.stringBufferOffset + size > STRING_BUFFER_SIZE) {
126 this.string += this.decode(this.stringBuffer.subarray(0, this.stringBufferOffset));
127 this.stringBufferOffset = 0;
128 }
129 this.stringBuffer.set(buf.subarray(start, end), this.stringBufferOffset);
130 this.stringBufferOffset += size;
131 }
132 write(buffer) {
133 if (typeof buffer === "string")
134 buffer = this.encode(buffer);
135 let n;
136 for (let i = 0, l = buffer.length; i < l; i++) {
137 if (this.tState === START) {
138 n = buffer[i];
139 this.offset++;
140 if (n === 0x7b) {
141 this.onToken(LEFT_BRACE, "{"); // {
142 }
143 else if (n === 0x7d) {
144 this.onToken(RIGHT_BRACE, "}"); // }
145 }
146 else if (n === 0x5b) {
147 this.onToken(LEFT_BRACKET, "["); // [
148 }
149 else if (n === 0x5d) {
150 this.onToken(RIGHT_BRACKET, "]"); // ]
151 }
152 else if (n === 0x3a) {
153 this.onToken(COLON, ":"); // :
154 }
155 else if (n === 0x2c) {
156 this.onToken(COMMA, ","); // ,
157 }
158 else if (n === 0x74) {
159 this.tState = TRUE1; // t
160 }
161 else if (n === 0x66) {
162 this.tState = FALSE1; // f
163 }
164 else if (n === 0x6e) {
165 this.tState = NULL1; // n
166 }
167 else if (n === 0x22) { // "
168 this.string = "";
169 this.stringBufferOffset = 0;
170 this.tState = STRING1;
171 }
172 else if (n === 0x2d) {
173 this.string = "-";
174 this.tState = NUMBER1; // -
175 }
176 else {
177 if (n >= 0x30 && n < 0x40) { // 1-9
178 this.string = String.fromCharCode(n);
179 this.tState = NUMBER3;
180 }
181 else if (n === 0x20 || n === 0x09 || n === 0x0a || n === 0x0d) {
182 // whitespace
183 }
184 else {
185 return this.charError(buffer, i);
186 }
187 }
188 }
189 else if (this.tState === STRING1) { // After open quote
190 n = buffer[i]; // get current byte from buffer
191 // check for carry over of a multi byte char split between data chunks
192 // & fill temp buffer it with start of this data chunk up to the boundary limit set in the last iteration
193 if (this.bytes_remaining > 0) {
194 let j;
195 for (j = 0; j < this.bytes_remaining; j++) {
196 this.temp_buffs[this.bytes_in_sequence][this.bytes_in_sequence - this.bytes_remaining + j] = buffer[j];
197 }
198 this.appendStringBuf(this.temp_buffs[this.bytes_in_sequence]);
199 this.bytes_in_sequence = this.bytes_remaining = 0;
200 i = i + j - 1;
201 }
202 else if (this.bytes_remaining === 0 && n >= 128) { // else if no remainder bytes carried over, parse multi byte (>=128) chars one at a time
203 if (n <= 193 || n > 244) {
204 return this.onError(new Error("Invalid UTF-8 character at position " + i + " in state " + Parser.toknam(this.tState)));
205 }
206 if ((n >= 194) && (n <= 223))
207 this.bytes_in_sequence = 2;
208 if ((n >= 224) && (n <= 239))
209 this.bytes_in_sequence = 3;
210 if ((n >= 240) && (n <= 244))
211 this.bytes_in_sequence = 4;
212 if ((this.bytes_in_sequence + i) > buffer.length) { // if bytes needed to complete char fall outside buffer length, we have a boundary split
213 for (let k = 0; k <= (buffer.length - 1 - i); k++) {
214 this.temp_buffs[this.bytes_in_sequence][k] = buffer[i + k]; // fill temp buffer of correct size with bytes available in this chunk
215 }
216 this.bytes_remaining = (i + this.bytes_in_sequence) - buffer.length;
217 i = buffer.length - 1;
218 }
219 else {
220 this.appendStringBuf(buffer, i, i + this.bytes_in_sequence);
221 i = i + this.bytes_in_sequence - 1;
222 }
223 }
224 else if (n === 0x22) {
225 this.tState = START;
226 this.string += this.decode(this.stringBuffer.subarray(0, this.stringBufferOffset));
227 this.stringBufferOffset = 0;
228 this.onToken(STRING, this.string);
229 this.offset += this.encode(this.string).length + 1;
230 this.string = undefined;
231 }
232 else if (n === 0x5c) {
233 this.tState = STRING2;
234 }
235 else if (n >= 0x20) {
236 this.appendStringChar(n);
237 }
238 else {
239 return this.charError(buffer, i);
240 }
241 }
242 else if (this.tState === STRING2) { // After backslash
243 n = buffer[i];
244 if (n === 0x22) {
245 this.appendStringChar(n);
246 this.tState = STRING1;
247 }
248 else if (n === 0x5c) {
249 this.appendStringChar(BACK_SLASH);
250 this.tState = STRING1;
251 }
252 else if (n === 0x2f) {
253 this.appendStringChar(FORWARD_SLASH);
254 this.tState = STRING1;
255 }
256 else if (n === 0x62) {
257 this.appendStringChar(BACKSPACE);
258 this.tState = STRING1;
259 }
260 else if (n === 0x66) {
261 this.appendStringChar(FORM_FEED);
262 this.tState = STRING1;
263 }
264 else if (n === 0x6e) {
265 this.appendStringChar(NEWLINE);
266 this.tState = STRING1;
267 }
268 else if (n === 0x72) {
269 this.appendStringChar(CARRIAGE_RETURN);
270 this.tState = STRING1;
271 }
272 else if (n === 0x74) {
273 this.appendStringChar(TAB);
274 this.tState = STRING1;
275 }
276 else if (n === 0x75) {
277 this.unicode = "";
278 this.tState = STRING3;
279 }
280 else {
281 return this.charError(buffer, i);
282 }
283 }
284 else if (this.tState === STRING3 || this.tState === STRING4 || this.tState === STRING5 || this.tState === STRING6) { // unicode hex codes
285 n = buffer[i];
286 // 0-9 A-F a-f
287 if ((n >= 0x30 && n < 0x40) || (n > 0x40 && n <= 0x46) || (n > 0x60 && n <= 0x66)) {
288 this.unicode += String.fromCharCode(n);
289 if (this.tState++ === STRING6) {
290 const intVal = parseInt(this.unicode, 16);
291 this.unicode = undefined;
292 if (this.highSurrogate !== undefined && intVal >= 0xDC00 && intVal < (0xDFFF + 1)) { //<56320,57343> - lowSurrogate
293 this.appendStringBuf(this.encode(String.fromCharCode(this.highSurrogate, intVal)));
294 this.highSurrogate = undefined;
295 }
296 else if (this.highSurrogate === undefined && intVal >= 0xD800 && intVal < (0xDBFF + 1)) { //<55296,56319> - highSurrogate
297 this.highSurrogate = intVal;
298 }
299 else {
300 if (this.highSurrogate !== undefined) {
301 this.appendStringBuf(this.encode(String.fromCharCode(this.highSurrogate)));
302 this.highSurrogate = undefined;
303 }
304 this.appendStringBuf(this.encode(String.fromCharCode(intVal)));
305 }
306 this.tState = STRING1;
307 }
308 }
309 else {
310 return this.charError(buffer, i);
311 }
312 }
313 else if (this.tState === NUMBER1 || this.tState === NUMBER3) {
314 n = buffer[i];
315 switch (n) {
316 case 0x30: // 0
317 case 0x31: // 1
318 case 0x32: // 2
319 case 0x33: // 3
320 case 0x34: // 4
321 case 0x35: // 5
322 case 0x36: // 6
323 case 0x37: // 7
324 case 0x38: // 8
325 case 0x39: // 9
326 case 0x2e: // .
327 case 0x65: // e
328 case 0x45: // E
329 case 0x2b: // +
330 case 0x2d: // -
331 this.string += String.fromCharCode(n);
332 this.tState = NUMBER3;
333 break;
334 default:
335 this.tState = START;
336 const error = this.numberReviver(this.string);
337 if (error) {
338 return error;
339 }
340 this.offset += this.string.length - 1;
341 this.string = undefined;
342 i--;
343 break;
344 }
345 }
346 else if (this.tState === TRUE1) { // r
347 if (buffer[i] === 0x72) {
348 this.tState = TRUE2;
349 }
350 else {
351 return this.charError(buffer, i);
352 }
353 }
354 else if (this.tState === TRUE2) { // u
355 if (buffer[i] === 0x75) {
356 this.tState = TRUE3;
357 }
358 else {
359 return this.charError(buffer, i);
360 }
361 }
362 else if (this.tState === TRUE3) { // e
363 if (buffer[i] === 0x65) {
364 this.tState = START;
365 this.onToken(TRUE, true);
366 this.offset += 3;
367 }
368 else {
369 return this.charError(buffer, i);
370 }
371 }
372 else if (this.tState === FALSE1) { // a
373 if (buffer[i] === 0x61) {
374 this.tState = FALSE2;
375 }
376 else {
377 return this.charError(buffer, i);
378 }
379 }
380 else if (this.tState === FALSE2) { // l
381 if (buffer[i] === 0x6c) {
382 this.tState = FALSE3;
383 }
384 else {
385 return this.charError(buffer, i);
386 }
387 }
388 else if (this.tState === FALSE3) { // s
389 if (buffer[i] === 0x73) {
390 this.tState = FALSE4;
391 }
392 else {
393 return this.charError(buffer, i);
394 }
395 }
396 else if (this.tState === FALSE4) { // e
397 if (buffer[i] === 0x65) {
398 this.tState = START;
399 this.onToken(FALSE, false);
400 this.offset += 4;
401 }
402 else {
403 return this.charError(buffer, i);
404 }
405 }
406 else if (this.tState === NULL1) { // u
407 if (buffer[i] === 0x75) {
408 this.tState = NULL2;
409 }
410 else {
411 return this.charError(buffer, i);
412 }
413 }
414 else if (this.tState === NULL2) { // l
415 if (buffer[i] === 0x6c) {
416 this.tState = NULL3;
417 }
418 else {
419 return this.charError(buffer, i);
420 }
421 }
422 else if (this.tState === NULL3) { // l
423 if (buffer[i] === 0x6c) {
424 this.tState = START;
425 this.onToken(NULL, null);
426 this.offset += 3;
427 }
428 else {
429 return this.charError(buffer, i);
430 }
431 }
432 }
433 }
434 parseError(token, value) {
435 this.tState = STOP;
436 this.onError(new Error("Unexpected " + Parser.toknam(token) + (value ? ("(" + JSON.stringify(value) + ")") : "") + " in state " + Parser.toknam(this.state)));
437 }
438 push() {
439 this.stack.push({ value: this.value, key: this.key, mode: this.mode });
440 }
441 pop() {
442 const value = this.value;
443 const parent = this.stack.pop();
444 this.value = parent.value;
445 this.key = parent.key;
446 this.mode = parent.mode;
447 this.emit(value);
448 if (!this.mode) {
449 this.state = VALUE;
450 }
451 }
452 emit(value) {
453 if (this.mode) {
454 this.state = COMMA;
455 }
456 this.onValue(value);
457 }
458 onValue(value) {
459 // Override me
460 }
461 onToken(token, value) {
462 if (this.state === VALUE) {
463 if (token === STRING || token === NUMBER || token === TRUE || token === FALSE || token === NULL) {
464 if (this.value) {
465 this.value[this.key] = value;
466 }
467 this.emit(value);
468 }
469 else if (token === LEFT_BRACE) {
470 this.push();
471 if (this.value) {
472 this.value = this.value[this.key] = {};
473 }
474 else {
475 this.value = {};
476 }
477 this.key = undefined;
478 this.state = KEY;
479 this.mode = OBJECT;
480 }
481 else if (token === LEFT_BRACKET) {
482 this.push();
483 if (this.value) {
484 this.value = this.value[this.key] = [];
485 }
486 else {
487 this.value = [];
488 }
489 this.key = 0;
490 this.mode = ARRAY;
491 this.state = VALUE;
492 }
493 else if (token === RIGHT_BRACE) {
494 if (this.mode === OBJECT) {
495 this.pop();
496 }
497 else {
498 return this.parseError(token, value);
499 }
500 }
501 else if (token === RIGHT_BRACKET) {
502 if (this.mode === ARRAY) {
503 this.pop();
504 }
505 else {
506 return this.parseError(token, value);
507 }
508 }
509 else {
510 return this.parseError(token, value);
511 }
512 }
513 else if (this.state === KEY) {
514 if (token === STRING) {
515 this.key = value;
516 this.state = COLON;
517 }
518 else if (token === RIGHT_BRACE) {
519 this.pop();
520 }
521 else {
522 return this.parseError(token, value);
523 }
524 }
525 else if (this.state === COLON) {
526 if (token === COLON) {
527 this.state = VALUE;
528 }
529 else {
530 return this.parseError(token, value);
531 }
532 }
533 else if (this.state === COMMA) {
534 if (token === COMMA) {
535 if (this.mode === ARRAY) {
536 this.key++;
537 this.state = VALUE;
538 }
539 else if (this.mode === OBJECT) {
540 this.state = KEY;
541 }
542 }
543 else if (token === RIGHT_BRACKET && this.mode === ARRAY || token === RIGHT_BRACE && this.mode === OBJECT) {
544 this.pop();
545 }
546 else {
547 return this.parseError(token, value);
548 }
549 }
550 else {
551 return this.parseError(token, value);
552 }
553 }
554 // Override to implement your own number reviver.
555 // Any value returned is treated as error and will interrupt parsing.
556 numberReviver(text) {
557 const result = Number(text);
558 if (isNaN(result)) {
559 return this.charError(buffer, i);
560 }
561 if ((text.match(/[0-9]+/) == text) && (result.toString() != text)) {
562 // Long string of digits which is an ID string and not valid and/or safe JavaScript integer Number
563 this.onToken(STRING, text);
564 }
565 else {
566 this.onToken(NUMBER, result);
567 }
568 }
569}
570Parser.C = C;
571export { Parser as JSONParser };
572export default Parser;
573//# sourceMappingURL=json-parser.js.map
\No newline at end of file