1 | import defineToJSON from '../jsutils/defineToJSON';
|
2 | import { syntaxError } from '../error/syntaxError';
|
3 | import { dedentBlockStringValue } from './blockString';
|
4 | import { TokenKind } from './tokenKind';
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 | export function createLexer(source, options) {
|
15 | var startOfFileToken = new Tok(TokenKind.SOF, 0, 0, 0, 0, null);
|
16 | var lexer = {
|
17 | source: source,
|
18 | options: options,
|
19 | lastToken: startOfFileToken,
|
20 | token: startOfFileToken,
|
21 | line: 1,
|
22 | lineStart: 0,
|
23 | advance: advanceLexer,
|
24 | lookahead: lookahead
|
25 | };
|
26 | return lexer;
|
27 | }
|
28 |
|
29 | function advanceLexer() {
|
30 | this.lastToken = this.token;
|
31 | var token = this.token = this.lookahead();
|
32 | return token;
|
33 | }
|
34 |
|
35 | function lookahead() {
|
36 | var token = this.token;
|
37 |
|
38 | if (token.kind !== TokenKind.EOF) {
|
39 | do {
|
40 |
|
41 | token = token.next || (token.next = readToken(this, token));
|
42 | } while (token.kind === TokenKind.COMMENT);
|
43 | }
|
44 |
|
45 | return token;
|
46 | }
|
47 |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 |
|
53 | export function isPunctuatorToken(token) {
|
54 | var kind = token.kind;
|
55 | return kind === TokenKind.BANG || kind === TokenKind.DOLLAR || kind === TokenKind.AMP || kind === TokenKind.PAREN_L || kind === TokenKind.PAREN_R || kind === TokenKind.SPREAD || kind === TokenKind.COLON || kind === TokenKind.EQUALS || kind === TokenKind.AT || kind === TokenKind.BRACKET_L || kind === TokenKind.BRACKET_R || kind === TokenKind.BRACE_L || kind === TokenKind.PIPE || kind === TokenKind.BRACE_R;
|
56 | }
|
57 |
|
58 |
|
59 |
|
60 |
|
61 | function Tok(kind, start, end, line, column, prev, value) {
|
62 | this.kind = kind;
|
63 | this.start = start;
|
64 | this.end = end;
|
65 | this.line = line;
|
66 | this.column = column;
|
67 | this.value = value;
|
68 | this.prev = prev;
|
69 | this.next = null;
|
70 | }
|
71 |
|
72 |
|
73 | defineToJSON(Tok, function () {
|
74 | return {
|
75 | kind: this.kind,
|
76 | value: this.value,
|
77 | line: this.line,
|
78 | column: this.column
|
79 | };
|
80 | });
|
81 |
|
82 | function printCharCode(code) {
|
83 | return (
|
84 | isNaN(code) ? TokenKind.EOF :
|
85 | code < 0x007f ? JSON.stringify(String.fromCharCode(code)) :
|
86 | "\"\\u".concat(('00' + code.toString(16).toUpperCase()).slice(-4), "\"")
|
87 | );
|
88 | }
|
89 |
|
90 |
|
91 |
|
92 |
|
93 |
|
94 |
|
95 |
|
96 |
|
97 |
|
98 | function readToken(lexer, prev) {
|
99 | var source = lexer.source;
|
100 | var body = source.body;
|
101 | var bodyLength = body.length;
|
102 | var pos = positionAfterWhitespace(body, prev.end, lexer);
|
103 | var line = lexer.line;
|
104 | var col = 1 + pos - lexer.lineStart;
|
105 |
|
106 | if (pos >= bodyLength) {
|
107 | return new Tok(TokenKind.EOF, bodyLength, bodyLength, line, col, prev);
|
108 | }
|
109 |
|
110 | var code = body.charCodeAt(pos);
|
111 |
|
112 | switch (code) {
|
113 |
|
114 | case 33:
|
115 | return new Tok(TokenKind.BANG, pos, pos + 1, line, col, prev);
|
116 |
|
117 |
|
118 | case 35:
|
119 | return readComment(source, pos, line, col, prev);
|
120 |
|
121 |
|
122 | case 36:
|
123 | return new Tok(TokenKind.DOLLAR, pos, pos + 1, line, col, prev);
|
124 |
|
125 |
|
126 | case 38:
|
127 | return new Tok(TokenKind.AMP, pos, pos + 1, line, col, prev);
|
128 |
|
129 |
|
130 | case 40:
|
131 | return new Tok(TokenKind.PAREN_L, pos, pos + 1, line, col, prev);
|
132 |
|
133 |
|
134 | case 41:
|
135 | return new Tok(TokenKind.PAREN_R, pos, pos + 1, line, col, prev);
|
136 |
|
137 |
|
138 | case 46:
|
139 | if (body.charCodeAt(pos + 1) === 46 && body.charCodeAt(pos + 2) === 46) {
|
140 | return new Tok(TokenKind.SPREAD, pos, pos + 3, line, col, prev);
|
141 | }
|
142 |
|
143 | break;
|
144 |
|
145 |
|
146 | case 58:
|
147 | return new Tok(TokenKind.COLON, pos, pos + 1, line, col, prev);
|
148 |
|
149 |
|
150 | case 61:
|
151 | return new Tok(TokenKind.EQUALS, pos, pos + 1, line, col, prev);
|
152 |
|
153 |
|
154 | case 64:
|
155 | return new Tok(TokenKind.AT, pos, pos + 1, line, col, prev);
|
156 |
|
157 |
|
158 | case 91:
|
159 | return new Tok(TokenKind.BRACKET_L, pos, pos + 1, line, col, prev);
|
160 |
|
161 |
|
162 | case 93:
|
163 | return new Tok(TokenKind.BRACKET_R, pos, pos + 1, line, col, prev);
|
164 |
|
165 |
|
166 | case 123:
|
167 | return new Tok(TokenKind.BRACE_L, pos, pos + 1, line, col, prev);
|
168 |
|
169 |
|
170 | case 124:
|
171 | return new Tok(TokenKind.PIPE, pos, pos + 1, line, col, prev);
|
172 |
|
173 |
|
174 | case 125:
|
175 | return new Tok(TokenKind.BRACE_R, pos, pos + 1, line, col, prev);
|
176 |
|
177 |
|
178 | case 65:
|
179 | case 66:
|
180 | case 67:
|
181 | case 68:
|
182 | case 69:
|
183 | case 70:
|
184 | case 71:
|
185 | case 72:
|
186 | case 73:
|
187 | case 74:
|
188 | case 75:
|
189 | case 76:
|
190 | case 77:
|
191 | case 78:
|
192 | case 79:
|
193 | case 80:
|
194 | case 81:
|
195 | case 82:
|
196 | case 83:
|
197 | case 84:
|
198 | case 85:
|
199 | case 86:
|
200 | case 87:
|
201 | case 88:
|
202 | case 89:
|
203 | case 90:
|
204 | case 95:
|
205 | case 97:
|
206 | case 98:
|
207 | case 99:
|
208 | case 100:
|
209 | case 101:
|
210 | case 102:
|
211 | case 103:
|
212 | case 104:
|
213 | case 105:
|
214 | case 106:
|
215 | case 107:
|
216 | case 108:
|
217 | case 109:
|
218 | case 110:
|
219 | case 111:
|
220 | case 112:
|
221 | case 113:
|
222 | case 114:
|
223 | case 115:
|
224 | case 116:
|
225 | case 117:
|
226 | case 118:
|
227 | case 119:
|
228 | case 120:
|
229 | case 121:
|
230 | case 122:
|
231 | return readName(source, pos, line, col, prev);
|
232 |
|
233 |
|
234 | case 45:
|
235 | case 48:
|
236 | case 49:
|
237 | case 50:
|
238 | case 51:
|
239 | case 52:
|
240 | case 53:
|
241 | case 54:
|
242 | case 55:
|
243 | case 56:
|
244 | case 57:
|
245 | return readNumber(source, pos, code, line, col, prev);
|
246 |
|
247 |
|
248 | case 34:
|
249 | if (body.charCodeAt(pos + 1) === 34 && body.charCodeAt(pos + 2) === 34) {
|
250 | return readBlockString(source, pos, line, col, prev, lexer);
|
251 | }
|
252 |
|
253 | return readString(source, pos, line, col, prev);
|
254 | }
|
255 |
|
256 | throw syntaxError(source, pos, unexpectedCharacterMessage(code));
|
257 | }
|
258 |
|
259 |
|
260 |
|
261 |
|
262 |
|
263 | function unexpectedCharacterMessage(code) {
|
264 | if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
|
265 | return "Cannot contain the invalid character ".concat(printCharCode(code), ".");
|
266 | }
|
267 |
|
268 | if (code === 39) {
|
269 |
|
270 | return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
|
271 | }
|
272 |
|
273 | return "Cannot parse the unexpected character ".concat(printCharCode(code), ".");
|
274 | }
|
275 |
|
276 |
|
277 |
|
278 |
|
279 |
|
280 |
|
281 | function positionAfterWhitespace(body, startPosition, lexer) {
|
282 | var bodyLength = body.length;
|
283 | var position = startPosition;
|
284 |
|
285 | while (position < bodyLength) {
|
286 | var code = body.charCodeAt(position);
|
287 |
|
288 | if (code === 9 || code === 32 || code === 44 || code === 0xfeff) {
|
289 | ++position;
|
290 | } else if (code === 10) {
|
291 |
|
292 | ++position;
|
293 | ++lexer.line;
|
294 | lexer.lineStart = position;
|
295 | } else if (code === 13) {
|
296 |
|
297 | if (body.charCodeAt(position + 1) === 10) {
|
298 | position += 2;
|
299 | } else {
|
300 | ++position;
|
301 | }
|
302 |
|
303 | ++lexer.line;
|
304 | lexer.lineStart = position;
|
305 | } else {
|
306 | break;
|
307 | }
|
308 | }
|
309 |
|
310 | return position;
|
311 | }
|
312 |
|
313 |
|
314 |
|
315 |
|
316 |
|
317 |
|
318 |
|
319 | function readComment(source, start, line, col, prev) {
|
320 | var body = source.body;
|
321 | var code;
|
322 | var position = start;
|
323 |
|
324 | do {
|
325 | code = body.charCodeAt(++position);
|
326 | } while (!isNaN(code) && (
|
327 | code > 0x001f || code === 0x0009));
|
328 |
|
329 | return new Tok(TokenKind.COMMENT, start, position, line, col, prev, body.slice(start + 1, position));
|
330 | }
|
331 |
|
332 |
|
333 |
|
334 |
|
335 |
|
336 |
|
337 |
|
338 |
|
339 |
|
340 | function readNumber(source, start, firstCode, line, col, prev) {
|
341 | var body = source.body;
|
342 | var code = firstCode;
|
343 | var position = start;
|
344 | var isFloat = false;
|
345 |
|
346 | if (code === 45) {
|
347 |
|
348 | code = body.charCodeAt(++position);
|
349 | }
|
350 |
|
351 | if (code === 48) {
|
352 |
|
353 | code = body.charCodeAt(++position);
|
354 |
|
355 | if (code >= 48 && code <= 57) {
|
356 | throw syntaxError(source, position, "Invalid number, unexpected digit after 0: ".concat(printCharCode(code), "."));
|
357 | }
|
358 | } else {
|
359 | position = readDigits(source, position, code);
|
360 | code = body.charCodeAt(position);
|
361 | }
|
362 |
|
363 | if (code === 46) {
|
364 |
|
365 | isFloat = true;
|
366 | code = body.charCodeAt(++position);
|
367 | position = readDigits(source, position, code);
|
368 | code = body.charCodeAt(position);
|
369 | }
|
370 |
|
371 | if (code === 69 || code === 101) {
|
372 |
|
373 | isFloat = true;
|
374 | code = body.charCodeAt(++position);
|
375 |
|
376 | if (code === 43 || code === 45) {
|
377 |
|
378 | code = body.charCodeAt(++position);
|
379 | }
|
380 |
|
381 | position = readDigits(source, position, code);
|
382 | code = body.charCodeAt(position);
|
383 | }
|
384 |
|
385 |
|
386 | if (code === 46 || code === 69 || code === 101) {
|
387 | throw syntaxError(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
|
388 | }
|
389 |
|
390 | return new Tok(isFloat ? TokenKind.FLOAT : TokenKind.INT, start, position, line, col, prev, body.slice(start, position));
|
391 | }
|
392 |
|
393 |
|
394 |
|
395 |
|
396 |
|
397 | function readDigits(source, start, firstCode) {
|
398 | var body = source.body;
|
399 | var position = start;
|
400 | var code = firstCode;
|
401 |
|
402 | if (code >= 48 && code <= 57) {
|
403 |
|
404 | do {
|
405 | code = body.charCodeAt(++position);
|
406 | } while (code >= 48 && code <= 57);
|
407 |
|
408 |
|
409 | return position;
|
410 | }
|
411 |
|
412 | throw syntaxError(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
|
413 | }
|
414 |
|
415 |
|
416 |
|
417 |
|
418 |
|
419 |
|
420 |
|
421 | function readString(source, start, line, col, prev) {
|
422 | var body = source.body;
|
423 | var position = start + 1;
|
424 | var chunkStart = position;
|
425 | var code = 0;
|
426 | var value = '';
|
427 |
|
428 | while (position < body.length && !isNaN(code = body.charCodeAt(position)) &&
|
429 | code !== 0x000a && code !== 0x000d) {
|
430 |
|
431 | if (code === 34) {
|
432 | value += body.slice(chunkStart, position);
|
433 | return new Tok(TokenKind.STRING, start, position + 1, line, col, prev, value);
|
434 | }
|
435 |
|
436 |
|
437 | if (code < 0x0020 && code !== 0x0009) {
|
438 | throw syntaxError(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
|
439 | }
|
440 |
|
441 | ++position;
|
442 |
|
443 | if (code === 92) {
|
444 |
|
445 | value += body.slice(chunkStart, position - 1);
|
446 | code = body.charCodeAt(position);
|
447 |
|
448 | switch (code) {
|
449 | case 34:
|
450 | value += '"';
|
451 | break;
|
452 |
|
453 | case 47:
|
454 | value += '/';
|
455 | break;
|
456 |
|
457 | case 92:
|
458 | value += '\\';
|
459 | break;
|
460 |
|
461 | case 98:
|
462 | value += '\b';
|
463 | break;
|
464 |
|
465 | case 102:
|
466 | value += '\f';
|
467 | break;
|
468 |
|
469 | case 110:
|
470 | value += '\n';
|
471 | break;
|
472 |
|
473 | case 114:
|
474 | value += '\r';
|
475 | break;
|
476 |
|
477 | case 116:
|
478 | value += '\t';
|
479 | break;
|
480 |
|
481 | case 117:
|
482 | {
|
483 |
|
484 | var charCode = uniCharCode(body.charCodeAt(position + 1), body.charCodeAt(position + 2), body.charCodeAt(position + 3), body.charCodeAt(position + 4));
|
485 |
|
486 | if (charCode < 0) {
|
487 | var invalidSequence = body.slice(position + 1, position + 5);
|
488 | throw syntaxError(source, position, "Invalid character escape sequence: \\u".concat(invalidSequence, "."));
|
489 | }
|
490 |
|
491 | value += String.fromCharCode(charCode);
|
492 | position += 4;
|
493 | break;
|
494 | }
|
495 |
|
496 | default:
|
497 | throw syntaxError(source, position, "Invalid character escape sequence: \\".concat(String.fromCharCode(code), "."));
|
498 | }
|
499 |
|
500 | ++position;
|
501 | chunkStart = position;
|
502 | }
|
503 | }
|
504 |
|
505 | throw syntaxError(source, position, 'Unterminated string.');
|
506 | }
|
507 |
|
508 |
|
509 |
|
510 |
|
511 |
|
512 |
|
513 |
|
514 | function readBlockString(source, start, line, col, prev, lexer) {
|
515 | var body = source.body;
|
516 | var position = start + 3;
|
517 | var chunkStart = position;
|
518 | var code = 0;
|
519 | var rawValue = '';
|
520 |
|
521 | while (position < body.length && !isNaN(code = body.charCodeAt(position))) {
|
522 |
|
523 | if (code === 34 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34) {
|
524 | rawValue += body.slice(chunkStart, position);
|
525 | return new Tok(TokenKind.BLOCK_STRING, start, position + 3, line, col, prev, dedentBlockStringValue(rawValue));
|
526 | }
|
527 |
|
528 |
|
529 | if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
|
530 | throw syntaxError(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
|
531 | }
|
532 |
|
533 | if (code === 10) {
|
534 |
|
535 | ++position;
|
536 | ++lexer.line;
|
537 | lexer.lineStart = position;
|
538 | } else if (code === 13) {
|
539 |
|
540 | if (body.charCodeAt(position + 1) === 10) {
|
541 | position += 2;
|
542 | } else {
|
543 | ++position;
|
544 | }
|
545 |
|
546 | ++lexer.line;
|
547 | lexer.lineStart = position;
|
548 | } else if (
|
549 | code === 92 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34 && body.charCodeAt(position + 3) === 34) {
|
550 | rawValue += body.slice(chunkStart, position) + '"""';
|
551 | position += 4;
|
552 | chunkStart = position;
|
553 | } else {
|
554 | ++position;
|
555 | }
|
556 | }
|
557 |
|
558 | throw syntaxError(source, position, 'Unterminated string.');
|
559 | }
|
560 |
|
561 |
|
562 |
|
563 |
|
564 |
|
565 |
|
566 |
|
567 |
|
568 |
|
569 |
|
570 |
|
571 |
|
572 | function uniCharCode(a, b, c, d) {
|
573 | return char2hex(a) << 12 | char2hex(b) << 8 | char2hex(c) << 4 | char2hex(d);
|
574 | }
|
575 |
|
576 |
|
577 |
|
578 |
|
579 |
|
580 |
|
581 |
|
582 |
|
583 |
|
584 |
|
585 | function char2hex(a) {
|
586 | return a >= 48 && a <= 57 ? a - 48
|
587 | : a >= 65 && a <= 70 ? a - 55
|
588 | : a >= 97 && a <= 102 ? a - 87
|
589 | : -1;
|
590 | }
|
591 |
|
592 |
|
593 |
|
594 |
|
595 |
|
596 |
|
597 |
|
598 | function readName(source, start, line, col, prev) {
|
599 | var body = source.body;
|
600 | var bodyLength = body.length;
|
601 | var position = start + 1;
|
602 | var code = 0;
|
603 |
|
604 | while (position !== bodyLength && !isNaN(code = body.charCodeAt(position)) && (code === 95 ||
|
605 | code >= 48 && code <= 57 ||
|
606 | code >= 65 && code <= 90 ||
|
607 | code >= 97 && code <= 122)
|
608 | ) {
|
609 | ++position;
|
610 | }
|
611 |
|
612 | return new Tok(TokenKind.NAME, start, position, line, col, prev, body.slice(start, position));
|
613 | }
|