1 | import { syntaxError } from "../error/syntaxError.mjs";
|
2 | import { Token } from "./ast.mjs";
|
3 | import { TokenKind } from "./tokenKind.mjs";
|
4 | import { dedentBlockStringValue } from "./blockString.mjs";
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 | export var Lexer = function () {
|
15 | |
16 |
|
17 |
|
18 |
|
19 | |
20 |
|
21 |
|
22 |
|
23 | |
24 |
|
25 |
|
26 |
|
27 | |
28 |
|
29 |
|
30 | function Lexer(source) {
|
31 | var startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0, null);
|
32 | this.source = source;
|
33 | this.lastToken = startOfFileToken;
|
34 | this.token = startOfFileToken;
|
35 | this.line = 1;
|
36 | this.lineStart = 0;
|
37 | }
|
38 | |
39 |
|
40 |
|
41 |
|
42 |
|
43 | var _proto = Lexer.prototype;
|
44 |
|
45 | _proto.advance = function advance() {
|
46 | this.lastToken = this.token;
|
47 | var token = this.token = this.lookahead();
|
48 | return token;
|
49 | }
|
50 | |
51 |
|
52 |
|
53 |
|
54 | ;
|
55 |
|
56 | _proto.lookahead = function lookahead() {
|
57 | var token = this.token;
|
58 |
|
59 | if (token.kind !== TokenKind.EOF) {
|
60 | do {
|
61 | var _token$next;
|
62 |
|
63 |
|
64 | token = (_token$next = token.next) !== null && _token$next !== void 0 ? _token$next : token.next = readToken(this, token);
|
65 | } while (token.kind === TokenKind.COMMENT);
|
66 | }
|
67 |
|
68 | return token;
|
69 | };
|
70 |
|
71 | return Lexer;
|
72 | }();
|
73 |
|
74 |
|
75 |
|
76 |
|
77 | export function isPunctuatorTokenKind(kind) {
|
78 | return kind === TokenKind.BANG || kind === TokenKind.DOLLAR || kind === TokenKind.AMP || kind === TokenKind.PAREN_L || kind === TokenKind.PAREN_R || kind === TokenKind.SPREAD || kind === TokenKind.COLON || kind === TokenKind.EQUALS || kind === TokenKind.AT || kind === TokenKind.BRACKET_L || kind === TokenKind.BRACKET_R || kind === TokenKind.BRACE_L || kind === TokenKind.PIPE || kind === TokenKind.BRACE_R;
|
79 | }
|
80 |
|
81 | function printCharCode(code) {
|
82 | return (
|
83 | isNaN(code) ? TokenKind.EOF :
|
84 | code < 0x007f ? JSON.stringify(String.fromCharCode(code)) :
|
85 | "\"\\u".concat(('00' + code.toString(16).toUpperCase()).slice(-4), "\"")
|
86 | );
|
87 | }
|
88 |
|
89 |
|
90 |
|
91 |
|
92 |
|
93 |
|
94 |
|
95 |
|
96 |
|
97 | function readToken(lexer, prev) {
|
98 | var source = lexer.source;
|
99 | var body = source.body;
|
100 | var bodyLength = body.length;
|
101 | var pos = prev.end;
|
102 |
|
103 | while (pos < bodyLength) {
|
104 | var code = body.charCodeAt(pos);
|
105 | var _line = lexer.line;
|
106 |
|
107 | var _col = 1 + pos - lexer.lineStart;
|
108 |
|
109 |
|
110 | switch (code) {
|
111 | case 0xfeff:
|
112 |
|
113 | case 9:
|
114 |
|
115 | case 32:
|
116 |
|
117 | case 44:
|
118 |
|
119 | ++pos;
|
120 | continue;
|
121 |
|
122 | case 10:
|
123 |
|
124 | ++pos;
|
125 | ++lexer.line;
|
126 | lexer.lineStart = pos;
|
127 | continue;
|
128 |
|
129 | case 13:
|
130 |
|
131 | if (body.charCodeAt(pos + 1) === 10) {
|
132 | pos += 2;
|
133 | } else {
|
134 | ++pos;
|
135 | }
|
136 |
|
137 | ++lexer.line;
|
138 | lexer.lineStart = pos;
|
139 | continue;
|
140 |
|
141 | case 33:
|
142 |
|
143 | return new Token(TokenKind.BANG, pos, pos + 1, _line, _col, prev);
|
144 |
|
145 | case 35:
|
146 |
|
147 | return readComment(source, pos, _line, _col, prev);
|
148 |
|
149 | case 36:
|
150 |
|
151 | return new Token(TokenKind.DOLLAR, pos, pos + 1, _line, _col, prev);
|
152 |
|
153 | case 38:
|
154 |
|
155 | return new Token(TokenKind.AMP, pos, pos + 1, _line, _col, prev);
|
156 |
|
157 | case 40:
|
158 |
|
159 | return new Token(TokenKind.PAREN_L, pos, pos + 1, _line, _col, prev);
|
160 |
|
161 | case 41:
|
162 |
|
163 | return new Token(TokenKind.PAREN_R, pos, pos + 1, _line, _col, prev);
|
164 |
|
165 | case 46:
|
166 |
|
167 | if (body.charCodeAt(pos + 1) === 46 && body.charCodeAt(pos + 2) === 46) {
|
168 | return new Token(TokenKind.SPREAD, pos, pos + 3, _line, _col, prev);
|
169 | }
|
170 |
|
171 | break;
|
172 |
|
173 | case 58:
|
174 |
|
175 | return new Token(TokenKind.COLON, pos, pos + 1, _line, _col, prev);
|
176 |
|
177 | case 61:
|
178 |
|
179 | return new Token(TokenKind.EQUALS, pos, pos + 1, _line, _col, prev);
|
180 |
|
181 | case 64:
|
182 |
|
183 | return new Token(TokenKind.AT, pos, pos + 1, _line, _col, prev);
|
184 |
|
185 | case 91:
|
186 |
|
187 | return new Token(TokenKind.BRACKET_L, pos, pos + 1, _line, _col, prev);
|
188 |
|
189 | case 93:
|
190 |
|
191 | return new Token(TokenKind.BRACKET_R, pos, pos + 1, _line, _col, prev);
|
192 |
|
193 | case 123:
|
194 |
|
195 | return new Token(TokenKind.BRACE_L, pos, pos + 1, _line, _col, prev);
|
196 |
|
197 | case 124:
|
198 |
|
199 | return new Token(TokenKind.PIPE, pos, pos + 1, _line, _col, prev);
|
200 |
|
201 | case 125:
|
202 |
|
203 | return new Token(TokenKind.BRACE_R, pos, pos + 1, _line, _col, prev);
|
204 |
|
205 | case 34:
|
206 |
|
207 | if (body.charCodeAt(pos + 1) === 34 && body.charCodeAt(pos + 2) === 34) {
|
208 | return readBlockString(source, pos, _line, _col, prev, lexer);
|
209 | }
|
210 |
|
211 | return readString(source, pos, _line, _col, prev);
|
212 |
|
213 | case 45:
|
214 |
|
215 | case 48:
|
216 |
|
217 | case 49:
|
218 |
|
219 | case 50:
|
220 |
|
221 | case 51:
|
222 |
|
223 | case 52:
|
224 |
|
225 | case 53:
|
226 |
|
227 | case 54:
|
228 |
|
229 | case 55:
|
230 |
|
231 | case 56:
|
232 |
|
233 | case 57:
|
234 |
|
235 | return readNumber(source, pos, code, _line, _col, prev);
|
236 |
|
237 | case 65:
|
238 |
|
239 | case 66:
|
240 |
|
241 | case 67:
|
242 |
|
243 | case 68:
|
244 |
|
245 | case 69:
|
246 |
|
247 | case 70:
|
248 |
|
249 | case 71:
|
250 |
|
251 | case 72:
|
252 |
|
253 | case 73:
|
254 |
|
255 | case 74:
|
256 |
|
257 | case 75:
|
258 |
|
259 | case 76:
|
260 |
|
261 | case 77:
|
262 |
|
263 | case 78:
|
264 |
|
265 | case 79:
|
266 |
|
267 | case 80:
|
268 |
|
269 | case 81:
|
270 |
|
271 | case 82:
|
272 |
|
273 | case 83:
|
274 |
|
275 | case 84:
|
276 |
|
277 | case 85:
|
278 |
|
279 | case 86:
|
280 |
|
281 | case 87:
|
282 |
|
283 | case 88:
|
284 |
|
285 | case 89:
|
286 |
|
287 | case 90:
|
288 |
|
289 | case 95:
|
290 |
|
291 | case 97:
|
292 |
|
293 | case 98:
|
294 |
|
295 | case 99:
|
296 |
|
297 | case 100:
|
298 |
|
299 | case 101:
|
300 |
|
301 | case 102:
|
302 |
|
303 | case 103:
|
304 |
|
305 | case 104:
|
306 |
|
307 | case 105:
|
308 |
|
309 | case 106:
|
310 |
|
311 | case 107:
|
312 |
|
313 | case 108:
|
314 |
|
315 | case 109:
|
316 |
|
317 | case 110:
|
318 |
|
319 | case 111:
|
320 |
|
321 | case 112:
|
322 |
|
323 | case 113:
|
324 |
|
325 | case 114:
|
326 |
|
327 | case 115:
|
328 |
|
329 | case 116:
|
330 |
|
331 | case 117:
|
332 |
|
333 | case 118:
|
334 |
|
335 | case 119:
|
336 |
|
337 | case 120:
|
338 |
|
339 | case 121:
|
340 |
|
341 | case 122:
|
342 |
|
343 | return readName(source, pos, _line, _col, prev);
|
344 | }
|
345 |
|
346 | throw syntaxError(source, pos, unexpectedCharacterMessage(code));
|
347 | }
|
348 |
|
349 | var line = lexer.line;
|
350 | var col = 1 + pos - lexer.lineStart;
|
351 | return new Token(TokenKind.EOF, bodyLength, bodyLength, line, col, prev);
|
352 | }
|
353 |
|
354 |
|
355 |
|
356 |
|
357 |
|
358 | function unexpectedCharacterMessage(code) {
|
359 | if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
|
360 | return "Cannot contain the invalid character ".concat(printCharCode(code), ".");
|
361 | }
|
362 |
|
363 | if (code === 39) {
|
364 |
|
365 | return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
|
366 | }
|
367 |
|
368 | return "Cannot parse the unexpected character ".concat(printCharCode(code), ".");
|
369 | }
|
370 |
|
371 |
|
372 |
|
373 |
|
374 |
|
375 |
|
376 |
|
377 | function readComment(source, start, line, col, prev) {
|
378 | var body = source.body;
|
379 | var code;
|
380 | var position = start;
|
381 |
|
382 | do {
|
383 | code = body.charCodeAt(++position);
|
384 | } while (!isNaN(code) && (
|
385 | code > 0x001f || code === 0x0009));
|
386 |
|
387 | return new Token(TokenKind.COMMENT, start, position, line, col, prev, body.slice(start + 1, position));
|
388 | }
|
389 |
|
390 |
|
391 |
|
392 |
|
393 |
|
394 |
|
395 |
|
396 |
|
397 |
|
398 | function readNumber(source, start, firstCode, line, col, prev) {
|
399 | var body = source.body;
|
400 | var code = firstCode;
|
401 | var position = start;
|
402 | var isFloat = false;
|
403 |
|
404 | if (code === 45) {
|
405 |
|
406 | code = body.charCodeAt(++position);
|
407 | }
|
408 |
|
409 | if (code === 48) {
|
410 |
|
411 | code = body.charCodeAt(++position);
|
412 |
|
413 | if (code >= 48 && code <= 57) {
|
414 | throw syntaxError(source, position, "Invalid number, unexpected digit after 0: ".concat(printCharCode(code), "."));
|
415 | }
|
416 | } else {
|
417 | position = readDigits(source, position, code);
|
418 | code = body.charCodeAt(position);
|
419 | }
|
420 |
|
421 | if (code === 46) {
|
422 |
|
423 | isFloat = true;
|
424 | code = body.charCodeAt(++position);
|
425 | position = readDigits(source, position, code);
|
426 | code = body.charCodeAt(position);
|
427 | }
|
428 |
|
429 | if (code === 69 || code === 101) {
|
430 |
|
431 | isFloat = true;
|
432 | code = body.charCodeAt(++position);
|
433 |
|
434 | if (code === 43 || code === 45) {
|
435 |
|
436 | code = body.charCodeAt(++position);
|
437 | }
|
438 |
|
439 | position = readDigits(source, position, code);
|
440 | code = body.charCodeAt(position);
|
441 | }
|
442 |
|
443 |
|
444 | if (code === 46 || isNameStart(code)) {
|
445 | throw syntaxError(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
|
446 | }
|
447 |
|
448 | return new Token(isFloat ? TokenKind.FLOAT : TokenKind.INT, start, position, line, col, prev, body.slice(start, position));
|
449 | }
|
450 |
|
451 |
|
452 |
|
453 |
|
454 |
|
455 | function readDigits(source, start, firstCode) {
|
456 | var body = source.body;
|
457 | var position = start;
|
458 | var code = firstCode;
|
459 |
|
460 | if (code >= 48 && code <= 57) {
|
461 |
|
462 | do {
|
463 | code = body.charCodeAt(++position);
|
464 | } while (code >= 48 && code <= 57);
|
465 |
|
466 |
|
467 | return position;
|
468 | }
|
469 |
|
470 | throw syntaxError(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
|
471 | }
|
472 |
|
473 |
|
474 |
|
475 |
|
476 |
|
477 |
|
478 |
|
479 | function readString(source, start, line, col, prev) {
|
480 | var body = source.body;
|
481 | var position = start + 1;
|
482 | var chunkStart = position;
|
483 | var code = 0;
|
484 | var value = '';
|
485 |
|
486 | while (position < body.length && !isNaN(code = body.charCodeAt(position)) &&
|
487 | code !== 0x000a && code !== 0x000d) {
|
488 |
|
489 | if (code === 34) {
|
490 | value += body.slice(chunkStart, position);
|
491 | return new Token(TokenKind.STRING, start, position + 1, line, col, prev, value);
|
492 | }
|
493 |
|
494 |
|
495 | if (code < 0x0020 && code !== 0x0009) {
|
496 | throw syntaxError(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
|
497 | }
|
498 |
|
499 | ++position;
|
500 |
|
501 | if (code === 92) {
|
502 |
|
503 | value += body.slice(chunkStart, position - 1);
|
504 | code = body.charCodeAt(position);
|
505 |
|
506 | switch (code) {
|
507 | case 34:
|
508 | value += '"';
|
509 | break;
|
510 |
|
511 | case 47:
|
512 | value += '/';
|
513 | break;
|
514 |
|
515 | case 92:
|
516 | value += '\\';
|
517 | break;
|
518 |
|
519 | case 98:
|
520 | value += '\b';
|
521 | break;
|
522 |
|
523 | case 102:
|
524 | value += '\f';
|
525 | break;
|
526 |
|
527 | case 110:
|
528 | value += '\n';
|
529 | break;
|
530 |
|
531 | case 114:
|
532 | value += '\r';
|
533 | break;
|
534 |
|
535 | case 116:
|
536 | value += '\t';
|
537 | break;
|
538 |
|
539 | case 117:
|
540 | {
|
541 |
|
542 | var charCode = uniCharCode(body.charCodeAt(position + 1), body.charCodeAt(position + 2), body.charCodeAt(position + 3), body.charCodeAt(position + 4));
|
543 |
|
544 | if (charCode < 0) {
|
545 | var invalidSequence = body.slice(position + 1, position + 5);
|
546 | throw syntaxError(source, position, "Invalid character escape sequence: \\u".concat(invalidSequence, "."));
|
547 | }
|
548 |
|
549 | value += String.fromCharCode(charCode);
|
550 | position += 4;
|
551 | break;
|
552 | }
|
553 |
|
554 | default:
|
555 | throw syntaxError(source, position, "Invalid character escape sequence: \\".concat(String.fromCharCode(code), "."));
|
556 | }
|
557 |
|
558 | ++position;
|
559 | chunkStart = position;
|
560 | }
|
561 | }
|
562 |
|
563 | throw syntaxError(source, position, 'Unterminated string.');
|
564 | }
|
565 |
|
566 |
|
567 |
|
568 |
|
569 |
|
570 |
|
571 |
|
572 | function readBlockString(source, start, line, col, prev, lexer) {
|
573 | var body = source.body;
|
574 | var position = start + 3;
|
575 | var chunkStart = position;
|
576 | var code = 0;
|
577 | var rawValue = '';
|
578 |
|
579 | while (position < body.length && !isNaN(code = body.charCodeAt(position))) {
|
580 |
|
581 | if (code === 34 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34) {
|
582 | rawValue += body.slice(chunkStart, position);
|
583 | return new Token(TokenKind.BLOCK_STRING, start, position + 3, line, col, prev, dedentBlockStringValue(rawValue));
|
584 | }
|
585 |
|
586 |
|
587 | if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
|
588 | throw syntaxError(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
|
589 | }
|
590 |
|
591 | if (code === 10) {
|
592 |
|
593 | ++position;
|
594 | ++lexer.line;
|
595 | lexer.lineStart = position;
|
596 | } else if (code === 13) {
|
597 |
|
598 | if (body.charCodeAt(position + 1) === 10) {
|
599 | position += 2;
|
600 | } else {
|
601 | ++position;
|
602 | }
|
603 |
|
604 | ++lexer.line;
|
605 | lexer.lineStart = position;
|
606 | } else if (
|
607 | code === 92 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34 && body.charCodeAt(position + 3) === 34) {
|
608 | rawValue += body.slice(chunkStart, position) + '"""';
|
609 | position += 4;
|
610 | chunkStart = position;
|
611 | } else {
|
612 | ++position;
|
613 | }
|
614 | }
|
615 |
|
616 | throw syntaxError(source, position, 'Unterminated string.');
|
617 | }
|
618 |
|
619 |
|
620 |
|
621 |
|
622 |
|
623 |
|
624 |
|
625 |
|
626 |
|
627 |
|
628 |
|
629 |
|
630 | function uniCharCode(a, b, c, d) {
|
631 | return char2hex(a) << 12 | char2hex(b) << 8 | char2hex(c) << 4 | char2hex(d);
|
632 | }
|
633 |
|
634 |
|
635 |
|
636 |
|
637 |
|
638 |
|
639 |
|
640 |
|
641 |
|
642 |
|
643 | function char2hex(a) {
|
644 | return a >= 48 && a <= 57 ? a - 48
|
645 | : a >= 65 && a <= 70 ? a - 55
|
646 | : a >= 97 && a <= 102 ? a - 87
|
647 | : -1;
|
648 | }
|
649 |
|
650 |
|
651 |
|
652 |
|
653 |
|
654 |
|
655 |
|
656 | function readName(source, start, line, col, prev) {
|
657 | var body = source.body;
|
658 | var bodyLength = body.length;
|
659 | var position = start + 1;
|
660 | var code = 0;
|
661 |
|
662 | while (position !== bodyLength && !isNaN(code = body.charCodeAt(position)) && (code === 95 ||
|
663 | code >= 48 && code <= 57 ||
|
664 | code >= 65 && code <= 90 ||
|
665 | code >= 97 && code <= 122)
|
666 | ) {
|
667 | ++position;
|
668 | }
|
669 |
|
670 | return new Token(TokenKind.NAME, start, position, line, col, prev, body.slice(start, position));
|
671 | }
|
672 |
|
673 |
|
674 | function isNameStart(code) {
|
675 | return code === 95 || code >= 65 && code <= 90 || code >= 97 && code <= 122;
|
676 | }
|