UNPKG

17.1 kBJavaScriptView Raw
1"use strict";
2
3Object.defineProperty(exports, "__esModule", {
4 value: true
5});
6exports.isPunctuatorTokenKind = isPunctuatorTokenKind;
7exports.Lexer = void 0;
8
9var _syntaxError = require("../error/syntaxError.js");
10
11var _ast = require("./ast.js");
12
13var _tokenKind = require("./tokenKind.js");
14
15var _blockString = require("./blockString.js");
16
17/**
18 * Given a Source object, creates a Lexer for that source.
19 * A Lexer is a stateful stream generator in that every time
20 * it is advanced, it returns the next token in the Source. Assuming the
21 * source lexes, the final Token emitted by the lexer will be of kind
22 * EOF, after which the lexer will repeatedly return the same EOF token
23 * whenever called.
24 */
25var Lexer = /*#__PURE__*/function () {
26 /**
27 * The previously focused non-ignored token.
28 */
29
30 /**
31 * The currently focused non-ignored token.
32 */
33
34 /**
35 * The (1-indexed) line containing the current token.
36 */
37
38 /**
39 * The character offset at which the current line begins.
40 */
41 function Lexer(source) {
42 var startOfFileToken = new _ast.Token(_tokenKind.TokenKind.SOF, 0, 0, 0, 0, null);
43 this.source = source;
44 this.lastToken = startOfFileToken;
45 this.token = startOfFileToken;
46 this.line = 1;
47 this.lineStart = 0;
48 }
49 /**
50 * Advances the token stream to the next non-ignored token.
51 */
52
53
54 var _proto = Lexer.prototype;
55
56 _proto.advance = function advance() {
57 this.lastToken = this.token;
58 var token = this.token = this.lookahead();
59 return token;
60 }
61 /**
62 * Looks ahead and returns the next non-ignored token, but does not change
63 * the state of Lexer.
64 */
65 ;
66
67 _proto.lookahead = function lookahead() {
68 var token = this.token;
69
70 if (token.kind !== _tokenKind.TokenKind.EOF) {
71 do {
72 var _token$next;
73
74 // Note: next is only mutable during parsing, so we cast to allow this.
75 token = (_token$next = token.next) !== null && _token$next !== void 0 ? _token$next : token.next = readToken(this, token);
76 } while (token.kind === _tokenKind.TokenKind.COMMENT);
77 }
78
79 return token;
80 };
81
82 return Lexer;
83}();
84/**
85 * @internal
86 */
87
88
89exports.Lexer = Lexer;
90
91function isPunctuatorTokenKind(kind) {
92 return kind === _tokenKind.TokenKind.BANG || kind === _tokenKind.TokenKind.DOLLAR || kind === _tokenKind.TokenKind.AMP || kind === _tokenKind.TokenKind.PAREN_L || kind === _tokenKind.TokenKind.PAREN_R || kind === _tokenKind.TokenKind.SPREAD || kind === _tokenKind.TokenKind.COLON || kind === _tokenKind.TokenKind.EQUALS || kind === _tokenKind.TokenKind.AT || kind === _tokenKind.TokenKind.BRACKET_L || kind === _tokenKind.TokenKind.BRACKET_R || kind === _tokenKind.TokenKind.BRACE_L || kind === _tokenKind.TokenKind.PIPE || kind === _tokenKind.TokenKind.BRACE_R;
93}
94
95function printCharCode(code) {
96 return (// NaN/undefined represents access beyond the end of the file.
97 isNaN(code) ? _tokenKind.TokenKind.EOF : // Trust JSON for ASCII.
98 code < 0x007f ? JSON.stringify(String.fromCharCode(code)) : // Otherwise print the escaped form.
99 "\"\\u".concat(('00' + code.toString(16).toUpperCase()).slice(-4), "\"")
100 );
101}
102/**
103 * Gets the next token from the source starting at the given position.
104 *
105 * This skips over whitespace until it finds the next lexable token, then lexes
106 * punctuators immediately or calls the appropriate helper function for more
107 * complicated tokens.
108 */
109
110
111function readToken(lexer, prev) {
112 var source = lexer.source;
113 var body = source.body;
114 var bodyLength = body.length;
115 var pos = prev.end;
116
117 while (pos < bodyLength) {
118 var code = body.charCodeAt(pos);
119 var _line = lexer.line;
120
121 var _col = 1 + pos - lexer.lineStart; // SourceCharacter
122
123
124 switch (code) {
125 case 0xfeff: // <BOM>
126
127 case 9: // \t
128
129 case 32: // <space>
130
131 case 44:
132 // ,
133 ++pos;
134 continue;
135
136 case 10:
137 // \n
138 ++pos;
139 ++lexer.line;
140 lexer.lineStart = pos;
141 continue;
142
143 case 13:
144 // \r
145 if (body.charCodeAt(pos + 1) === 10) {
146 pos += 2;
147 } else {
148 ++pos;
149 }
150
151 ++lexer.line;
152 lexer.lineStart = pos;
153 continue;
154
155 case 33:
156 // !
157 return new _ast.Token(_tokenKind.TokenKind.BANG, pos, pos + 1, _line, _col, prev);
158
159 case 35:
160 // #
161 return readComment(source, pos, _line, _col, prev);
162
163 case 36:
164 // $
165 return new _ast.Token(_tokenKind.TokenKind.DOLLAR, pos, pos + 1, _line, _col, prev);
166
167 case 38:
168 // &
169 return new _ast.Token(_tokenKind.TokenKind.AMP, pos, pos + 1, _line, _col, prev);
170
171 case 40:
172 // (
173 return new _ast.Token(_tokenKind.TokenKind.PAREN_L, pos, pos + 1, _line, _col, prev);
174
175 case 41:
176 // )
177 return new _ast.Token(_tokenKind.TokenKind.PAREN_R, pos, pos + 1, _line, _col, prev);
178
179 case 46:
180 // .
181 if (body.charCodeAt(pos + 1) === 46 && body.charCodeAt(pos + 2) === 46) {
182 return new _ast.Token(_tokenKind.TokenKind.SPREAD, pos, pos + 3, _line, _col, prev);
183 }
184
185 break;
186
187 case 58:
188 // :
189 return new _ast.Token(_tokenKind.TokenKind.COLON, pos, pos + 1, _line, _col, prev);
190
191 case 61:
192 // =
193 return new _ast.Token(_tokenKind.TokenKind.EQUALS, pos, pos + 1, _line, _col, prev);
194
195 case 64:
196 // @
197 return new _ast.Token(_tokenKind.TokenKind.AT, pos, pos + 1, _line, _col, prev);
198
199 case 91:
200 // [
201 return new _ast.Token(_tokenKind.TokenKind.BRACKET_L, pos, pos + 1, _line, _col, prev);
202
203 case 93:
204 // ]
205 return new _ast.Token(_tokenKind.TokenKind.BRACKET_R, pos, pos + 1, _line, _col, prev);
206
207 case 123:
208 // {
209 return new _ast.Token(_tokenKind.TokenKind.BRACE_L, pos, pos + 1, _line, _col, prev);
210
211 case 124:
212 // |
213 return new _ast.Token(_tokenKind.TokenKind.PIPE, pos, pos + 1, _line, _col, prev);
214
215 case 125:
216 // }
217 return new _ast.Token(_tokenKind.TokenKind.BRACE_R, pos, pos + 1, _line, _col, prev);
218
219 case 34:
220 // "
221 if (body.charCodeAt(pos + 1) === 34 && body.charCodeAt(pos + 2) === 34) {
222 return readBlockString(source, pos, _line, _col, prev, lexer);
223 }
224
225 return readString(source, pos, _line, _col, prev);
226
227 case 45: // -
228
229 case 48: // 0
230
231 case 49: // 1
232
233 case 50: // 2
234
235 case 51: // 3
236
237 case 52: // 4
238
239 case 53: // 5
240
241 case 54: // 6
242
243 case 55: // 7
244
245 case 56: // 8
246
247 case 57:
248 // 9
249 return readNumber(source, pos, code, _line, _col, prev);
250
251 case 65: // A
252
253 case 66: // B
254
255 case 67: // C
256
257 case 68: // D
258
259 case 69: // E
260
261 case 70: // F
262
263 case 71: // G
264
265 case 72: // H
266
267 case 73: // I
268
269 case 74: // J
270
271 case 75: // K
272
273 case 76: // L
274
275 case 77: // M
276
277 case 78: // N
278
279 case 79: // O
280
281 case 80: // P
282
283 case 81: // Q
284
285 case 82: // R
286
287 case 83: // S
288
289 case 84: // T
290
291 case 85: // U
292
293 case 86: // V
294
295 case 87: // W
296
297 case 88: // X
298
299 case 89: // Y
300
301 case 90: // Z
302
303 case 95: // _
304
305 case 97: // a
306
307 case 98: // b
308
309 case 99: // c
310
311 case 100: // d
312
313 case 101: // e
314
315 case 102: // f
316
317 case 103: // g
318
319 case 104: // h
320
321 case 105: // i
322
323 case 106: // j
324
325 case 107: // k
326
327 case 108: // l
328
329 case 109: // m
330
331 case 110: // n
332
333 case 111: // o
334
335 case 112: // p
336
337 case 113: // q
338
339 case 114: // r
340
341 case 115: // s
342
343 case 116: // t
344
345 case 117: // u
346
347 case 118: // v
348
349 case 119: // w
350
351 case 120: // x
352
353 case 121: // y
354
355 case 122:
356 // z
357 return readName(source, pos, _line, _col, prev);
358 }
359
360 throw (0, _syntaxError.syntaxError)(source, pos, unexpectedCharacterMessage(code));
361 }
362
363 var line = lexer.line;
364 var col = 1 + pos - lexer.lineStart;
365 return new _ast.Token(_tokenKind.TokenKind.EOF, bodyLength, bodyLength, line, col, prev);
366}
367/**
368 * Report a message that an unexpected character was encountered.
369 */
370
371
372function unexpectedCharacterMessage(code) {
373 if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
374 return "Cannot contain the invalid character ".concat(printCharCode(code), ".");
375 }
376
377 if (code === 39) {
378 // '
379 return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
380 }
381
382 return "Cannot parse the unexpected character ".concat(printCharCode(code), ".");
383}
384/**
385 * Reads a comment token from the source file.
386 *
387 * #[\u0009\u0020-\uFFFF]*
388 */
389
390
391function readComment(source, start, line, col, prev) {
392 var body = source.body;
393 var code;
394 var position = start;
395
396 do {
397 code = body.charCodeAt(++position);
398 } while (!isNaN(code) && ( // SourceCharacter but not LineTerminator
399 code > 0x001f || code === 0x0009));
400
401 return new _ast.Token(_tokenKind.TokenKind.COMMENT, start, position, line, col, prev, body.slice(start + 1, position));
402}
403/**
404 * Reads a number token from the source file, either a float
405 * or an int depending on whether a decimal point appears.
406 *
407 * Int: -?(0|[1-9][0-9]*)
408 * Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
409 */
410
411
412function readNumber(source, start, firstCode, line, col, prev) {
413 var body = source.body;
414 var code = firstCode;
415 var position = start;
416 var isFloat = false;
417
418 if (code === 45) {
419 // -
420 code = body.charCodeAt(++position);
421 }
422
423 if (code === 48) {
424 // 0
425 code = body.charCodeAt(++position);
426
427 if (code >= 48 && code <= 57) {
428 throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, unexpected digit after 0: ".concat(printCharCode(code), "."));
429 }
430 } else {
431 position = readDigits(source, position, code);
432 code = body.charCodeAt(position);
433 }
434
435 if (code === 46) {
436 // .
437 isFloat = true;
438 code = body.charCodeAt(++position);
439 position = readDigits(source, position, code);
440 code = body.charCodeAt(position);
441 }
442
443 if (code === 69 || code === 101) {
444 // E e
445 isFloat = true;
446 code = body.charCodeAt(++position);
447
448 if (code === 43 || code === 45) {
449 // + -
450 code = body.charCodeAt(++position);
451 }
452
453 position = readDigits(source, position, code);
454 code = body.charCodeAt(position);
455 } // Numbers cannot be followed by . or NameStart
456
457
458 if (code === 46 || isNameStart(code)) {
459 throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
460 }
461
462 return new _ast.Token(isFloat ? _tokenKind.TokenKind.FLOAT : _tokenKind.TokenKind.INT, start, position, line, col, prev, body.slice(start, position));
463}
464/**
465 * Returns the new position in the source after reading digits.
466 */
467
468
469function readDigits(source, start, firstCode) {
470 var body = source.body;
471 var position = start;
472 var code = firstCode;
473
474 if (code >= 48 && code <= 57) {
475 // 0 - 9
476 do {
477 code = body.charCodeAt(++position);
478 } while (code >= 48 && code <= 57); // 0 - 9
479
480
481 return position;
482 }
483
484 throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
485}
486/**
487 * Reads a string token from the source file.
488 *
489 * "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
490 */
491
492
493function readString(source, start, line, col, prev) {
494 var body = source.body;
495 var position = start + 1;
496 var chunkStart = position;
497 var code = 0;
498 var value = '';
499
500 while (position < body.length && !isNaN(code = body.charCodeAt(position)) && // not LineTerminator
501 code !== 0x000a && code !== 0x000d) {
502 // Closing Quote (")
503 if (code === 34) {
504 value += body.slice(chunkStart, position);
505 return new _ast.Token(_tokenKind.TokenKind.STRING, start, position + 1, line, col, prev, value);
506 } // SourceCharacter
507
508
509 if (code < 0x0020 && code !== 0x0009) {
510 throw (0, _syntaxError.syntaxError)(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
511 }
512
513 ++position;
514
515 if (code === 92) {
516 // \
517 value += body.slice(chunkStart, position - 1);
518 code = body.charCodeAt(position);
519
520 switch (code) {
521 case 34:
522 value += '"';
523 break;
524
525 case 47:
526 value += '/';
527 break;
528
529 case 92:
530 value += '\\';
531 break;
532
533 case 98:
534 value += '\b';
535 break;
536
537 case 102:
538 value += '\f';
539 break;
540
541 case 110:
542 value += '\n';
543 break;
544
545 case 114:
546 value += '\r';
547 break;
548
549 case 116:
550 value += '\t';
551 break;
552
553 case 117:
554 {
555 // uXXXX
556 var charCode = uniCharCode(body.charCodeAt(position + 1), body.charCodeAt(position + 2), body.charCodeAt(position + 3), body.charCodeAt(position + 4));
557
558 if (charCode < 0) {
559 var invalidSequence = body.slice(position + 1, position + 5);
560 throw (0, _syntaxError.syntaxError)(source, position, "Invalid character escape sequence: \\u".concat(invalidSequence, "."));
561 }
562
563 value += String.fromCharCode(charCode);
564 position += 4;
565 break;
566 }
567
568 default:
569 throw (0, _syntaxError.syntaxError)(source, position, "Invalid character escape sequence: \\".concat(String.fromCharCode(code), "."));
570 }
571
572 ++position;
573 chunkStart = position;
574 }
575 }
576
577 throw (0, _syntaxError.syntaxError)(source, position, 'Unterminated string.');
578}
579/**
580 * Reads a block string token from the source file.
581 *
582 * """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
583 */
584
585
586function readBlockString(source, start, line, col, prev, lexer) {
587 var body = source.body;
588 var position = start + 3;
589 var chunkStart = position;
590 var code = 0;
591 var rawValue = '';
592
593 while (position < body.length && !isNaN(code = body.charCodeAt(position))) {
594 // Closing Triple-Quote (""")
595 if (code === 34 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34) {
596 rawValue += body.slice(chunkStart, position);
597 return new _ast.Token(_tokenKind.TokenKind.BLOCK_STRING, start, position + 3, line, col, prev, (0, _blockString.dedentBlockStringValue)(rawValue));
598 } // SourceCharacter
599
600
601 if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
602 throw (0, _syntaxError.syntaxError)(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
603 }
604
605 if (code === 10) {
606 // new line
607 ++position;
608 ++lexer.line;
609 lexer.lineStart = position;
610 } else if (code === 13) {
611 // carriage return
612 if (body.charCodeAt(position + 1) === 10) {
613 position += 2;
614 } else {
615 ++position;
616 }
617
618 ++lexer.line;
619 lexer.lineStart = position;
620 } else if ( // Escape Triple-Quote (\""")
621 code === 92 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34 && body.charCodeAt(position + 3) === 34) {
622 rawValue += body.slice(chunkStart, position) + '"""';
623 position += 4;
624 chunkStart = position;
625 } else {
626 ++position;
627 }
628 }
629
630 throw (0, _syntaxError.syntaxError)(source, position, 'Unterminated string.');
631}
632/**
633 * Converts four hexadecimal chars to the integer that the
634 * string represents. For example, uniCharCode('0','0','0','f')
635 * will return 15, and uniCharCode('0','0','f','f') returns 255.
636 *
637 * Returns a negative number on error, if a char was invalid.
638 *
639 * This is implemented by noting that char2hex() returns -1 on error,
640 * which means the result of ORing the char2hex() will also be negative.
641 */
642
643
644function uniCharCode(a, b, c, d) {
645 return char2hex(a) << 12 | char2hex(b) << 8 | char2hex(c) << 4 | char2hex(d);
646}
647/**
648 * Converts a hex character to its integer value.
649 * '0' becomes 0, '9' becomes 9
650 * 'A' becomes 10, 'F' becomes 15
651 * 'a' becomes 10, 'f' becomes 15
652 *
653 * Returns -1 on error.
654 */
655
656
657function char2hex(a) {
658 return a >= 48 && a <= 57 ? a - 48 // 0-9
659 : a >= 65 && a <= 70 ? a - 55 // A-F
660 : a >= 97 && a <= 102 ? a - 87 // a-f
661 : -1;
662}
663/**
664 * Reads an alphanumeric + underscore name from the source.
665 *
666 * [_A-Za-z][_0-9A-Za-z]*
667 */
668
669
670function readName(source, start, line, col, prev) {
671 var body = source.body;
672 var bodyLength = body.length;
673 var position = start + 1;
674 var code = 0;
675
676 while (position !== bodyLength && !isNaN(code = body.charCodeAt(position)) && (code === 95 || // _
677 code >= 48 && code <= 57 || // 0-9
678 code >= 65 && code <= 90 || // A-Z
679 code >= 97 && code <= 122) // a-z
680 ) {
681 ++position;
682 }
683
684 return new _ast.Token(_tokenKind.TokenKind.NAME, start, position, line, col, prev, body.slice(start, position));
685} // _ A-Z a-z
686
687
688function isNameStart(code) {
689 return code === 95 || code >= 65 && code <= 90 || code >= 97 && code <= 122;
690}