1 | "use strict";
|
2 |
|
3 | Object.defineProperty(exports, "__esModule", {
|
4 | value: true
|
5 | });
|
6 | exports.isPunctuatorTokenKind = isPunctuatorTokenKind;
|
7 | exports.Lexer = void 0;
|
8 |
|
9 | var _syntaxError = require("../error/syntaxError.js");
|
10 |
|
11 | var _ast = require("./ast.js");
|
12 |
|
13 | var _tokenKind = require("./tokenKind.js");
|
14 |
|
15 | var _blockString = require("./blockString.js");
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 | var Lexer = function () {
|
26 | |
27 |
|
28 |
|
29 |
|
30 | |
31 |
|
32 |
|
33 |
|
34 | |
35 |
|
36 |
|
37 |
|
38 | |
39 |
|
40 |
|
41 | function Lexer(source) {
|
42 | var startOfFileToken = new _ast.Token(_tokenKind.TokenKind.SOF, 0, 0, 0, 0, null);
|
43 | this.source = source;
|
44 | this.lastToken = startOfFileToken;
|
45 | this.token = startOfFileToken;
|
46 | this.line = 1;
|
47 | this.lineStart = 0;
|
48 | }
|
49 | |
50 |
|
51 |
|
52 |
|
53 |
|
54 | var _proto = Lexer.prototype;
|
55 |
|
56 | _proto.advance = function advance() {
|
57 | this.lastToken = this.token;
|
58 | var token = this.token = this.lookahead();
|
59 | return token;
|
60 | }
|
61 | |
62 |
|
63 |
|
64 |
|
65 | ;
|
66 |
|
67 | _proto.lookahead = function lookahead() {
|
68 | var token = this.token;
|
69 |
|
70 | if (token.kind !== _tokenKind.TokenKind.EOF) {
|
71 | do {
|
72 | var _token$next;
|
73 |
|
74 |
|
75 | token = (_token$next = token.next) !== null && _token$next !== void 0 ? _token$next : token.next = readToken(this, token);
|
76 | } while (token.kind === _tokenKind.TokenKind.COMMENT);
|
77 | }
|
78 |
|
79 | return token;
|
80 | };
|
81 |
|
82 | return Lexer;
|
83 | }();
|
84 |
|
85 |
|
86 |
|
87 |
|
88 |
|
89 | exports.Lexer = Lexer;
|
90 |
|
91 | function isPunctuatorTokenKind(kind) {
|
92 | return kind === _tokenKind.TokenKind.BANG || kind === _tokenKind.TokenKind.DOLLAR || kind === _tokenKind.TokenKind.AMP || kind === _tokenKind.TokenKind.PAREN_L || kind === _tokenKind.TokenKind.PAREN_R || kind === _tokenKind.TokenKind.SPREAD || kind === _tokenKind.TokenKind.COLON || kind === _tokenKind.TokenKind.EQUALS || kind === _tokenKind.TokenKind.AT || kind === _tokenKind.TokenKind.BRACKET_L || kind === _tokenKind.TokenKind.BRACKET_R || kind === _tokenKind.TokenKind.BRACE_L || kind === _tokenKind.TokenKind.PIPE || kind === _tokenKind.TokenKind.BRACE_R;
|
93 | }
|
94 |
|
95 | function printCharCode(code) {
|
96 | return (
|
97 | isNaN(code) ? _tokenKind.TokenKind.EOF :
|
98 | code < 0x007f ? JSON.stringify(String.fromCharCode(code)) :
|
99 | "\"\\u".concat(('00' + code.toString(16).toUpperCase()).slice(-4), "\"")
|
100 | );
|
101 | }
|
102 |
|
103 |
|
104 |
|
105 |
|
106 |
|
107 |
|
108 |
|
109 |
|
110 |
|
111 | function readToken(lexer, prev) {
|
112 | var source = lexer.source;
|
113 | var body = source.body;
|
114 | var bodyLength = body.length;
|
115 | var pos = prev.end;
|
116 |
|
117 | while (pos < bodyLength) {
|
118 | var code = body.charCodeAt(pos);
|
119 | var _line = lexer.line;
|
120 |
|
121 | var _col = 1 + pos - lexer.lineStart;
|
122 |
|
123 |
|
124 | switch (code) {
|
125 | case 0xfeff:
|
126 |
|
127 | case 9:
|
128 |
|
129 | case 32:
|
130 |
|
131 | case 44:
|
132 |
|
133 | ++pos;
|
134 | continue;
|
135 |
|
136 | case 10:
|
137 |
|
138 | ++pos;
|
139 | ++lexer.line;
|
140 | lexer.lineStart = pos;
|
141 | continue;
|
142 |
|
143 | case 13:
|
144 |
|
145 | if (body.charCodeAt(pos + 1) === 10) {
|
146 | pos += 2;
|
147 | } else {
|
148 | ++pos;
|
149 | }
|
150 |
|
151 | ++lexer.line;
|
152 | lexer.lineStart = pos;
|
153 | continue;
|
154 |
|
155 | case 33:
|
156 |
|
157 | return new _ast.Token(_tokenKind.TokenKind.BANG, pos, pos + 1, _line, _col, prev);
|
158 |
|
159 | case 35:
|
160 |
|
161 | return readComment(source, pos, _line, _col, prev);
|
162 |
|
163 | case 36:
|
164 |
|
165 | return new _ast.Token(_tokenKind.TokenKind.DOLLAR, pos, pos + 1, _line, _col, prev);
|
166 |
|
167 | case 38:
|
168 |
|
169 | return new _ast.Token(_tokenKind.TokenKind.AMP, pos, pos + 1, _line, _col, prev);
|
170 |
|
171 | case 40:
|
172 |
|
173 | return new _ast.Token(_tokenKind.TokenKind.PAREN_L, pos, pos + 1, _line, _col, prev);
|
174 |
|
175 | case 41:
|
176 |
|
177 | return new _ast.Token(_tokenKind.TokenKind.PAREN_R, pos, pos + 1, _line, _col, prev);
|
178 |
|
179 | case 46:
|
180 |
|
181 | if (body.charCodeAt(pos + 1) === 46 && body.charCodeAt(pos + 2) === 46) {
|
182 | return new _ast.Token(_tokenKind.TokenKind.SPREAD, pos, pos + 3, _line, _col, prev);
|
183 | }
|
184 |
|
185 | break;
|
186 |
|
187 | case 58:
|
188 |
|
189 | return new _ast.Token(_tokenKind.TokenKind.COLON, pos, pos + 1, _line, _col, prev);
|
190 |
|
191 | case 61:
|
192 |
|
193 | return new _ast.Token(_tokenKind.TokenKind.EQUALS, pos, pos + 1, _line, _col, prev);
|
194 |
|
195 | case 64:
|
196 |
|
197 | return new _ast.Token(_tokenKind.TokenKind.AT, pos, pos + 1, _line, _col, prev);
|
198 |
|
199 | case 91:
|
200 |
|
201 | return new _ast.Token(_tokenKind.TokenKind.BRACKET_L, pos, pos + 1, _line, _col, prev);
|
202 |
|
203 | case 93:
|
204 |
|
205 | return new _ast.Token(_tokenKind.TokenKind.BRACKET_R, pos, pos + 1, _line, _col, prev);
|
206 |
|
207 | case 123:
|
208 |
|
209 | return new _ast.Token(_tokenKind.TokenKind.BRACE_L, pos, pos + 1, _line, _col, prev);
|
210 |
|
211 | case 124:
|
212 |
|
213 | return new _ast.Token(_tokenKind.TokenKind.PIPE, pos, pos + 1, _line, _col, prev);
|
214 |
|
215 | case 125:
|
216 |
|
217 | return new _ast.Token(_tokenKind.TokenKind.BRACE_R, pos, pos + 1, _line, _col, prev);
|
218 |
|
219 | case 34:
|
220 |
|
221 | if (body.charCodeAt(pos + 1) === 34 && body.charCodeAt(pos + 2) === 34) {
|
222 | return readBlockString(source, pos, _line, _col, prev, lexer);
|
223 | }
|
224 |
|
225 | return readString(source, pos, _line, _col, prev);
|
226 |
|
227 | case 45:
|
228 |
|
229 | case 48:
|
230 |
|
231 | case 49:
|
232 |
|
233 | case 50:
|
234 |
|
235 | case 51:
|
236 |
|
237 | case 52:
|
238 |
|
239 | case 53:
|
240 |
|
241 | case 54:
|
242 |
|
243 | case 55:
|
244 |
|
245 | case 56:
|
246 |
|
247 | case 57:
|
248 |
|
249 | return readNumber(source, pos, code, _line, _col, prev);
|
250 |
|
251 | case 65:
|
252 |
|
253 | case 66:
|
254 |
|
255 | case 67:
|
256 |
|
257 | case 68:
|
258 |
|
259 | case 69:
|
260 |
|
261 | case 70:
|
262 |
|
263 | case 71:
|
264 |
|
265 | case 72:
|
266 |
|
267 | case 73:
|
268 |
|
269 | case 74:
|
270 |
|
271 | case 75:
|
272 |
|
273 | case 76:
|
274 |
|
275 | case 77:
|
276 |
|
277 | case 78:
|
278 |
|
279 | case 79:
|
280 |
|
281 | case 80:
|
282 |
|
283 | case 81:
|
284 |
|
285 | case 82:
|
286 |
|
287 | case 83:
|
288 |
|
289 | case 84:
|
290 |
|
291 | case 85:
|
292 |
|
293 | case 86:
|
294 |
|
295 | case 87:
|
296 |
|
297 | case 88:
|
298 |
|
299 | case 89:
|
300 |
|
301 | case 90:
|
302 |
|
303 | case 95:
|
304 |
|
305 | case 97:
|
306 |
|
307 | case 98:
|
308 |
|
309 | case 99:
|
310 |
|
311 | case 100:
|
312 |
|
313 | case 101:
|
314 |
|
315 | case 102:
|
316 |
|
317 | case 103:
|
318 |
|
319 | case 104:
|
320 |
|
321 | case 105:
|
322 |
|
323 | case 106:
|
324 |
|
325 | case 107:
|
326 |
|
327 | case 108:
|
328 |
|
329 | case 109:
|
330 |
|
331 | case 110:
|
332 |
|
333 | case 111:
|
334 |
|
335 | case 112:
|
336 |
|
337 | case 113:
|
338 |
|
339 | case 114:
|
340 |
|
341 | case 115:
|
342 |
|
343 | case 116:
|
344 |
|
345 | case 117:
|
346 |
|
347 | case 118:
|
348 |
|
349 | case 119:
|
350 |
|
351 | case 120:
|
352 |
|
353 | case 121:
|
354 |
|
355 | case 122:
|
356 |
|
357 | return readName(source, pos, _line, _col, prev);
|
358 | }
|
359 |
|
360 | throw (0, _syntaxError.syntaxError)(source, pos, unexpectedCharacterMessage(code));
|
361 | }
|
362 |
|
363 | var line = lexer.line;
|
364 | var col = 1 + pos - lexer.lineStart;
|
365 | return new _ast.Token(_tokenKind.TokenKind.EOF, bodyLength, bodyLength, line, col, prev);
|
366 | }
|
367 |
|
368 |
|
369 |
|
370 |
|
371 |
|
372 | function unexpectedCharacterMessage(code) {
|
373 | if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
|
374 | return "Cannot contain the invalid character ".concat(printCharCode(code), ".");
|
375 | }
|
376 |
|
377 | if (code === 39) {
|
378 |
|
379 | return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
|
380 | }
|
381 |
|
382 | return "Cannot parse the unexpected character ".concat(printCharCode(code), ".");
|
383 | }
|
384 |
|
385 |
|
386 |
|
387 |
|
388 |
|
389 |
|
390 |
|
391 | function readComment(source, start, line, col, prev) {
|
392 | var body = source.body;
|
393 | var code;
|
394 | var position = start;
|
395 |
|
396 | do {
|
397 | code = body.charCodeAt(++position);
|
398 | } while (!isNaN(code) && (
|
399 | code > 0x001f || code === 0x0009));
|
400 |
|
401 | return new _ast.Token(_tokenKind.TokenKind.COMMENT, start, position, line, col, prev, body.slice(start + 1, position));
|
402 | }
|
403 |
|
404 |
|
405 |
|
406 |
|
407 |
|
408 |
|
409 |
|
410 |
|
411 |
|
412 | function readNumber(source, start, firstCode, line, col, prev) {
|
413 | var body = source.body;
|
414 | var code = firstCode;
|
415 | var position = start;
|
416 | var isFloat = false;
|
417 |
|
418 | if (code === 45) {
|
419 |
|
420 | code = body.charCodeAt(++position);
|
421 | }
|
422 |
|
423 | if (code === 48) {
|
424 |
|
425 | code = body.charCodeAt(++position);
|
426 |
|
427 | if (code >= 48 && code <= 57) {
|
428 | throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, unexpected digit after 0: ".concat(printCharCode(code), "."));
|
429 | }
|
430 | } else {
|
431 | position = readDigits(source, position, code);
|
432 | code = body.charCodeAt(position);
|
433 | }
|
434 |
|
435 | if (code === 46) {
|
436 |
|
437 | isFloat = true;
|
438 | code = body.charCodeAt(++position);
|
439 | position = readDigits(source, position, code);
|
440 | code = body.charCodeAt(position);
|
441 | }
|
442 |
|
443 | if (code === 69 || code === 101) {
|
444 |
|
445 | isFloat = true;
|
446 | code = body.charCodeAt(++position);
|
447 |
|
448 | if (code === 43 || code === 45) {
|
449 |
|
450 | code = body.charCodeAt(++position);
|
451 | }
|
452 |
|
453 | position = readDigits(source, position, code);
|
454 | code = body.charCodeAt(position);
|
455 | }
|
456 |
|
457 |
|
458 | if (code === 46 || isNameStart(code)) {
|
459 | throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
|
460 | }
|
461 |
|
462 | return new _ast.Token(isFloat ? _tokenKind.TokenKind.FLOAT : _tokenKind.TokenKind.INT, start, position, line, col, prev, body.slice(start, position));
|
463 | }
|
464 |
|
465 |
|
466 |
|
467 |
|
468 |
|
469 | function readDigits(source, start, firstCode) {
|
470 | var body = source.body;
|
471 | var position = start;
|
472 | var code = firstCode;
|
473 |
|
474 | if (code >= 48 && code <= 57) {
|
475 |
|
476 | do {
|
477 | code = body.charCodeAt(++position);
|
478 | } while (code >= 48 && code <= 57);
|
479 |
|
480 |
|
481 | return position;
|
482 | }
|
483 |
|
484 | throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
|
485 | }
|
486 |
|
487 |
|
488 |
|
489 |
|
490 |
|
491 |
|
492 |
|
493 | function readString(source, start, line, col, prev) {
|
494 | var body = source.body;
|
495 | var position = start + 1;
|
496 | var chunkStart = position;
|
497 | var code = 0;
|
498 | var value = '';
|
499 |
|
500 | while (position < body.length && !isNaN(code = body.charCodeAt(position)) &&
|
501 | code !== 0x000a && code !== 0x000d) {
|
502 |
|
503 | if (code === 34) {
|
504 | value += body.slice(chunkStart, position);
|
505 | return new _ast.Token(_tokenKind.TokenKind.STRING, start, position + 1, line, col, prev, value);
|
506 | }
|
507 |
|
508 |
|
509 | if (code < 0x0020 && code !== 0x0009) {
|
510 | throw (0, _syntaxError.syntaxError)(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
|
511 | }
|
512 |
|
513 | ++position;
|
514 |
|
515 | if (code === 92) {
|
516 |
|
517 | value += body.slice(chunkStart, position - 1);
|
518 | code = body.charCodeAt(position);
|
519 |
|
520 | switch (code) {
|
521 | case 34:
|
522 | value += '"';
|
523 | break;
|
524 |
|
525 | case 47:
|
526 | value += '/';
|
527 | break;
|
528 |
|
529 | case 92:
|
530 | value += '\\';
|
531 | break;
|
532 |
|
533 | case 98:
|
534 | value += '\b';
|
535 | break;
|
536 |
|
537 | case 102:
|
538 | value += '\f';
|
539 | break;
|
540 |
|
541 | case 110:
|
542 | value += '\n';
|
543 | break;
|
544 |
|
545 | case 114:
|
546 | value += '\r';
|
547 | break;
|
548 |
|
549 | case 116:
|
550 | value += '\t';
|
551 | break;
|
552 |
|
553 | case 117:
|
554 | {
|
555 |
|
556 | var charCode = uniCharCode(body.charCodeAt(position + 1), body.charCodeAt(position + 2), body.charCodeAt(position + 3), body.charCodeAt(position + 4));
|
557 |
|
558 | if (charCode < 0) {
|
559 | var invalidSequence = body.slice(position + 1, position + 5);
|
560 | throw (0, _syntaxError.syntaxError)(source, position, "Invalid character escape sequence: \\u".concat(invalidSequence, "."));
|
561 | }
|
562 |
|
563 | value += String.fromCharCode(charCode);
|
564 | position += 4;
|
565 | break;
|
566 | }
|
567 |
|
568 | default:
|
569 | throw (0, _syntaxError.syntaxError)(source, position, "Invalid character escape sequence: \\".concat(String.fromCharCode(code), "."));
|
570 | }
|
571 |
|
572 | ++position;
|
573 | chunkStart = position;
|
574 | }
|
575 | }
|
576 |
|
577 | throw (0, _syntaxError.syntaxError)(source, position, 'Unterminated string.');
|
578 | }
|
579 |
|
580 |
|
581 |
|
582 |
|
583 |
|
584 |
|
585 |
|
586 | function readBlockString(source, start, line, col, prev, lexer) {
|
587 | var body = source.body;
|
588 | var position = start + 3;
|
589 | var chunkStart = position;
|
590 | var code = 0;
|
591 | var rawValue = '';
|
592 |
|
593 | while (position < body.length && !isNaN(code = body.charCodeAt(position))) {
|
594 |
|
595 | if (code === 34 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34) {
|
596 | rawValue += body.slice(chunkStart, position);
|
597 | return new _ast.Token(_tokenKind.TokenKind.BLOCK_STRING, start, position + 3, line, col, prev, (0, _blockString.dedentBlockStringValue)(rawValue));
|
598 | }
|
599 |
|
600 |
|
601 | if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
|
602 | throw (0, _syntaxError.syntaxError)(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
|
603 | }
|
604 |
|
605 | if (code === 10) {
|
606 |
|
607 | ++position;
|
608 | ++lexer.line;
|
609 | lexer.lineStart = position;
|
610 | } else if (code === 13) {
|
611 |
|
612 | if (body.charCodeAt(position + 1) === 10) {
|
613 | position += 2;
|
614 | } else {
|
615 | ++position;
|
616 | }
|
617 |
|
618 | ++lexer.line;
|
619 | lexer.lineStart = position;
|
620 | } else if (
|
621 | code === 92 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34 && body.charCodeAt(position + 3) === 34) {
|
622 | rawValue += body.slice(chunkStart, position) + '"""';
|
623 | position += 4;
|
624 | chunkStart = position;
|
625 | } else {
|
626 | ++position;
|
627 | }
|
628 | }
|
629 |
|
630 | throw (0, _syntaxError.syntaxError)(source, position, 'Unterminated string.');
|
631 | }
|
632 |
|
633 |
|
634 |
|
635 |
|
636 |
|
637 |
|
638 |
|
639 |
|
640 |
|
641 |
|
642 |
|
643 |
|
644 | function uniCharCode(a, b, c, d) {
|
645 | return char2hex(a) << 12 | char2hex(b) << 8 | char2hex(c) << 4 | char2hex(d);
|
646 | }
|
647 |
|
648 |
|
649 |
|
650 |
|
651 |
|
652 |
|
653 |
|
654 |
|
655 |
|
656 |
|
657 | function char2hex(a) {
|
658 | return a >= 48 && a <= 57 ? a - 48
|
659 | : a >= 65 && a <= 70 ? a - 55
|
660 | : a >= 97 && a <= 102 ? a - 87
|
661 | : -1;
|
662 | }
|
663 |
|
664 |
|
665 |
|
666 |
|
667 |
|
668 |
|
669 |
|
670 | function readName(source, start, line, col, prev) {
|
671 | var body = source.body;
|
672 | var bodyLength = body.length;
|
673 | var position = start + 1;
|
674 | var code = 0;
|
675 |
|
676 | while (position !== bodyLength && !isNaN(code = body.charCodeAt(position)) && (code === 95 ||
|
677 | code >= 48 && code <= 57 ||
|
678 | code >= 65 && code <= 90 ||
|
679 | code >= 97 && code <= 122)
|
680 | ) {
|
681 | ++position;
|
682 | }
|
683 |
|
684 | return new _ast.Token(_tokenKind.TokenKind.NAME, start, position, line, col, prev, body.slice(start, position));
|
685 | }
|
686 |
|
687 |
|
688 | function isNameStart(code) {
|
689 | return code === 95 || code >= 65 && code <= 90 || code >= 97 && code <= 122;
|
690 | }
|