UNPKG

16.5 kBJavaScriptView Raw
1"use strict";
2
3Object.defineProperty(exports, "__esModule", {
4 value: true
5});
6exports.createLexer = createLexer;
7exports.isPunctuatorToken = isPunctuatorToken;
8
9var _defineToJSON = _interopRequireDefault(require("../jsutils/defineToJSON"));
10
11var _syntaxError = require("../error/syntaxError");
12
13var _blockString = require("./blockString");
14
15var _tokenKind = require("./tokenKind");
16
17function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
18
19/**
20 * Given a Source object, this returns a Lexer for that source.
21 * A Lexer is a stateful stream generator in that every time
22 * it is advanced, it returns the next token in the Source. Assuming the
23 * source lexes, the final Token emitted by the lexer will be of kind
24 * EOF, after which the lexer will repeatedly return the same EOF token
25 * whenever called.
26 */
27function createLexer(source, options) {
28 var startOfFileToken = new Tok(_tokenKind.TokenKind.SOF, 0, 0, 0, 0, null);
29 var lexer = {
30 source: source,
31 options: options,
32 lastToken: startOfFileToken,
33 token: startOfFileToken,
34 line: 1,
35 lineStart: 0,
36 advance: advanceLexer,
37 lookahead: lookahead
38 };
39 return lexer;
40}
41
42function advanceLexer() {
43 this.lastToken = this.token;
44 var token = this.token = this.lookahead();
45 return token;
46}
47
48function lookahead() {
49 var token = this.token;
50
51 if (token.kind !== _tokenKind.TokenKind.EOF) {
52 do {
53 // Note: next is only mutable during parsing, so we cast to allow this.
54 token = token.next || (token.next = readToken(this, token));
55 } while (token.kind === _tokenKind.TokenKind.COMMENT);
56 }
57
58 return token;
59}
60/**
61 * The return type of createLexer.
62 */
63
64
65// @internal
66function isPunctuatorToken(token) {
67 var kind = token.kind;
68 return kind === _tokenKind.TokenKind.BANG || kind === _tokenKind.TokenKind.DOLLAR || kind === _tokenKind.TokenKind.AMP || kind === _tokenKind.TokenKind.PAREN_L || kind === _tokenKind.TokenKind.PAREN_R || kind === _tokenKind.TokenKind.SPREAD || kind === _tokenKind.TokenKind.COLON || kind === _tokenKind.TokenKind.EQUALS || kind === _tokenKind.TokenKind.AT || kind === _tokenKind.TokenKind.BRACKET_L || kind === _tokenKind.TokenKind.BRACKET_R || kind === _tokenKind.TokenKind.BRACE_L || kind === _tokenKind.TokenKind.PIPE || kind === _tokenKind.TokenKind.BRACE_R;
69}
70/**
71 * Helper function for constructing the Token object.
72 */
73
74
75function Tok(kind, start, end, line, column, prev, value) {
76 this.kind = kind;
77 this.start = start;
78 this.end = end;
79 this.line = line;
80 this.column = column;
81 this.value = value;
82 this.prev = prev;
83 this.next = null;
84} // Print a simplified form when appearing in JSON/util.inspect.
85
86
87(0, _defineToJSON.default)(Tok, function () {
88 return {
89 kind: this.kind,
90 value: this.value,
91 line: this.line,
92 column: this.column
93 };
94});
95
96function printCharCode(code) {
97 return (// NaN/undefined represents access beyond the end of the file.
98 isNaN(code) ? _tokenKind.TokenKind.EOF : // Trust JSON for ASCII.
99 code < 0x007f ? JSON.stringify(String.fromCharCode(code)) : // Otherwise print the escaped form.
100 "\"\\u".concat(('00' + code.toString(16).toUpperCase()).slice(-4), "\"")
101 );
102}
103/**
104 * Gets the next token from the source starting at the given position.
105 *
106 * This skips over whitespace until it finds the next lexable token, then lexes
107 * punctuators immediately or calls the appropriate helper function for more
108 * complicated tokens.
109 */
110
111
112function readToken(lexer, prev) {
113 var source = lexer.source;
114 var body = source.body;
115 var bodyLength = body.length;
116 var pos = positionAfterWhitespace(body, prev.end, lexer);
117 var line = lexer.line;
118 var col = 1 + pos - lexer.lineStart;
119
120 if (pos >= bodyLength) {
121 return new Tok(_tokenKind.TokenKind.EOF, bodyLength, bodyLength, line, col, prev);
122 }
123
124 var code = body.charCodeAt(pos); // SourceCharacter
125
126 switch (code) {
127 // !
128 case 33:
129 return new Tok(_tokenKind.TokenKind.BANG, pos, pos + 1, line, col, prev);
130 // #
131
132 case 35:
133 return readComment(source, pos, line, col, prev);
134 // $
135
136 case 36:
137 return new Tok(_tokenKind.TokenKind.DOLLAR, pos, pos + 1, line, col, prev);
138 // &
139
140 case 38:
141 return new Tok(_tokenKind.TokenKind.AMP, pos, pos + 1, line, col, prev);
142 // (
143
144 case 40:
145 return new Tok(_tokenKind.TokenKind.PAREN_L, pos, pos + 1, line, col, prev);
146 // )
147
148 case 41:
149 return new Tok(_tokenKind.TokenKind.PAREN_R, pos, pos + 1, line, col, prev);
150 // .
151
152 case 46:
153 if (body.charCodeAt(pos + 1) === 46 && body.charCodeAt(pos + 2) === 46) {
154 return new Tok(_tokenKind.TokenKind.SPREAD, pos, pos + 3, line, col, prev);
155 }
156
157 break;
158 // :
159
160 case 58:
161 return new Tok(_tokenKind.TokenKind.COLON, pos, pos + 1, line, col, prev);
162 // =
163
164 case 61:
165 return new Tok(_tokenKind.TokenKind.EQUALS, pos, pos + 1, line, col, prev);
166 // @
167
168 case 64:
169 return new Tok(_tokenKind.TokenKind.AT, pos, pos + 1, line, col, prev);
170 // [
171
172 case 91:
173 return new Tok(_tokenKind.TokenKind.BRACKET_L, pos, pos + 1, line, col, prev);
174 // ]
175
176 case 93:
177 return new Tok(_tokenKind.TokenKind.BRACKET_R, pos, pos + 1, line, col, prev);
178 // {
179
180 case 123:
181 return new Tok(_tokenKind.TokenKind.BRACE_L, pos, pos + 1, line, col, prev);
182 // |
183
184 case 124:
185 return new Tok(_tokenKind.TokenKind.PIPE, pos, pos + 1, line, col, prev);
186 // }
187
188 case 125:
189 return new Tok(_tokenKind.TokenKind.BRACE_R, pos, pos + 1, line, col, prev);
190 // A-Z _ a-z
191
192 case 65:
193 case 66:
194 case 67:
195 case 68:
196 case 69:
197 case 70:
198 case 71:
199 case 72:
200 case 73:
201 case 74:
202 case 75:
203 case 76:
204 case 77:
205 case 78:
206 case 79:
207 case 80:
208 case 81:
209 case 82:
210 case 83:
211 case 84:
212 case 85:
213 case 86:
214 case 87:
215 case 88:
216 case 89:
217 case 90:
218 case 95:
219 case 97:
220 case 98:
221 case 99:
222 case 100:
223 case 101:
224 case 102:
225 case 103:
226 case 104:
227 case 105:
228 case 106:
229 case 107:
230 case 108:
231 case 109:
232 case 110:
233 case 111:
234 case 112:
235 case 113:
236 case 114:
237 case 115:
238 case 116:
239 case 117:
240 case 118:
241 case 119:
242 case 120:
243 case 121:
244 case 122:
245 return readName(source, pos, line, col, prev);
246 // - 0-9
247
248 case 45:
249 case 48:
250 case 49:
251 case 50:
252 case 51:
253 case 52:
254 case 53:
255 case 54:
256 case 55:
257 case 56:
258 case 57:
259 return readNumber(source, pos, code, line, col, prev);
260 // "
261
262 case 34:
263 if (body.charCodeAt(pos + 1) === 34 && body.charCodeAt(pos + 2) === 34) {
264 return readBlockString(source, pos, line, col, prev, lexer);
265 }
266
267 return readString(source, pos, line, col, prev);
268 }
269
270 throw (0, _syntaxError.syntaxError)(source, pos, unexpectedCharacterMessage(code));
271}
272/**
273 * Report a message that an unexpected character was encountered.
274 */
275
276
277function unexpectedCharacterMessage(code) {
278 if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
279 return "Cannot contain the invalid character ".concat(printCharCode(code), ".");
280 }
281
282 if (code === 39) {
283 // '
284 return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
285 }
286
287 return "Cannot parse the unexpected character ".concat(printCharCode(code), ".");
288}
289/**
290 * Reads from body starting at startPosition until it finds a non-whitespace
291 * character, then returns the position of that character for lexing.
292 */
293
294
295function positionAfterWhitespace(body, startPosition, lexer) {
296 var bodyLength = body.length;
297 var position = startPosition;
298
299 while (position < bodyLength) {
300 var code = body.charCodeAt(position); // tab | space | comma | BOM
301
302 if (code === 9 || code === 32 || code === 44 || code === 0xfeff) {
303 ++position;
304 } else if (code === 10) {
305 // new line
306 ++position;
307 ++lexer.line;
308 lexer.lineStart = position;
309 } else if (code === 13) {
310 // carriage return
311 if (body.charCodeAt(position + 1) === 10) {
312 position += 2;
313 } else {
314 ++position;
315 }
316
317 ++lexer.line;
318 lexer.lineStart = position;
319 } else {
320 break;
321 }
322 }
323
324 return position;
325}
326/**
327 * Reads a comment token from the source file.
328 *
329 * #[\u0009\u0020-\uFFFF]*
330 */
331
332
333function readComment(source, start, line, col, prev) {
334 var body = source.body;
335 var code;
336 var position = start;
337
338 do {
339 code = body.charCodeAt(++position);
340 } while (!isNaN(code) && ( // SourceCharacter but not LineTerminator
341 code > 0x001f || code === 0x0009));
342
343 return new Tok(_tokenKind.TokenKind.COMMENT, start, position, line, col, prev, body.slice(start + 1, position));
344}
345/**
346 * Reads a number token from the source file, either a float
347 * or an int depending on whether a decimal point appears.
348 *
349 * Int: -?(0|[1-9][0-9]*)
350 * Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
351 */
352
353
354function readNumber(source, start, firstCode, line, col, prev) {
355 var body = source.body;
356 var code = firstCode;
357 var position = start;
358 var isFloat = false;
359
360 if (code === 45) {
361 // -
362 code = body.charCodeAt(++position);
363 }
364
365 if (code === 48) {
366 // 0
367 code = body.charCodeAt(++position);
368
369 if (code >= 48 && code <= 57) {
370 throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, unexpected digit after 0: ".concat(printCharCode(code), "."));
371 }
372 } else {
373 position = readDigits(source, position, code);
374 code = body.charCodeAt(position);
375 }
376
377 if (code === 46) {
378 // .
379 isFloat = true;
380 code = body.charCodeAt(++position);
381 position = readDigits(source, position, code);
382 code = body.charCodeAt(position);
383 }
384
385 if (code === 69 || code === 101) {
386 // E e
387 isFloat = true;
388 code = body.charCodeAt(++position);
389
390 if (code === 43 || code === 45) {
391 // + -
392 code = body.charCodeAt(++position);
393 }
394
395 position = readDigits(source, position, code);
396 code = body.charCodeAt(position);
397 } // Numbers cannot be followed by . or e
398
399
400 if (code === 46 || code === 69 || code === 101) {
401 throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
402 }
403
404 return new Tok(isFloat ? _tokenKind.TokenKind.FLOAT : _tokenKind.TokenKind.INT, start, position, line, col, prev, body.slice(start, position));
405}
406/**
407 * Returns the new position in the source after reading digits.
408 */
409
410
411function readDigits(source, start, firstCode) {
412 var body = source.body;
413 var position = start;
414 var code = firstCode;
415
416 if (code >= 48 && code <= 57) {
417 // 0 - 9
418 do {
419 code = body.charCodeAt(++position);
420 } while (code >= 48 && code <= 57); // 0 - 9
421
422
423 return position;
424 }
425
426 throw (0, _syntaxError.syntaxError)(source, position, "Invalid number, expected digit but got: ".concat(printCharCode(code), "."));
427}
428/**
429 * Reads a string token from the source file.
430 *
431 * "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
432 */
433
434
435function readString(source, start, line, col, prev) {
436 var body = source.body;
437 var position = start + 1;
438 var chunkStart = position;
439 var code = 0;
440 var value = '';
441
442 while (position < body.length && !isNaN(code = body.charCodeAt(position)) && // not LineTerminator
443 code !== 0x000a && code !== 0x000d) {
444 // Closing Quote (")
445 if (code === 34) {
446 value += body.slice(chunkStart, position);
447 return new Tok(_tokenKind.TokenKind.STRING, start, position + 1, line, col, prev, value);
448 } // SourceCharacter
449
450
451 if (code < 0x0020 && code !== 0x0009) {
452 throw (0, _syntaxError.syntaxError)(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
453 }
454
455 ++position;
456
457 if (code === 92) {
458 // \
459 value += body.slice(chunkStart, position - 1);
460 code = body.charCodeAt(position);
461
462 switch (code) {
463 case 34:
464 value += '"';
465 break;
466
467 case 47:
468 value += '/';
469 break;
470
471 case 92:
472 value += '\\';
473 break;
474
475 case 98:
476 value += '\b';
477 break;
478
479 case 102:
480 value += '\f';
481 break;
482
483 case 110:
484 value += '\n';
485 break;
486
487 case 114:
488 value += '\r';
489 break;
490
491 case 116:
492 value += '\t';
493 break;
494
495 case 117:
496 {
497 // uXXXX
498 var charCode = uniCharCode(body.charCodeAt(position + 1), body.charCodeAt(position + 2), body.charCodeAt(position + 3), body.charCodeAt(position + 4));
499
500 if (charCode < 0) {
501 var invalidSequence = body.slice(position + 1, position + 5);
502 throw (0, _syntaxError.syntaxError)(source, position, "Invalid character escape sequence: \\u".concat(invalidSequence, "."));
503 }
504
505 value += String.fromCharCode(charCode);
506 position += 4;
507 break;
508 }
509
510 default:
511 throw (0, _syntaxError.syntaxError)(source, position, "Invalid character escape sequence: \\".concat(String.fromCharCode(code), "."));
512 }
513
514 ++position;
515 chunkStart = position;
516 }
517 }
518
519 throw (0, _syntaxError.syntaxError)(source, position, 'Unterminated string.');
520}
521/**
522 * Reads a block string token from the source file.
523 *
524 * """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
525 */
526
527
528function readBlockString(source, start, line, col, prev, lexer) {
529 var body = source.body;
530 var position = start + 3;
531 var chunkStart = position;
532 var code = 0;
533 var rawValue = '';
534
535 while (position < body.length && !isNaN(code = body.charCodeAt(position))) {
536 // Closing Triple-Quote (""")
537 if (code === 34 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34) {
538 rawValue += body.slice(chunkStart, position);
539 return new Tok(_tokenKind.TokenKind.BLOCK_STRING, start, position + 3, line, col, prev, (0, _blockString.dedentBlockStringValue)(rawValue));
540 } // SourceCharacter
541
542
543 if (code < 0x0020 && code !== 0x0009 && code !== 0x000a && code !== 0x000d) {
544 throw (0, _syntaxError.syntaxError)(source, position, "Invalid character within String: ".concat(printCharCode(code), "."));
545 }
546
547 if (code === 10) {
548 // new line
549 ++position;
550 ++lexer.line;
551 lexer.lineStart = position;
552 } else if (code === 13) {
553 // carriage return
554 if (body.charCodeAt(position + 1) === 10) {
555 position += 2;
556 } else {
557 ++position;
558 }
559
560 ++lexer.line;
561 lexer.lineStart = position;
562 } else if ( // Escape Triple-Quote (\""")
563 code === 92 && body.charCodeAt(position + 1) === 34 && body.charCodeAt(position + 2) === 34 && body.charCodeAt(position + 3) === 34) {
564 rawValue += body.slice(chunkStart, position) + '"""';
565 position += 4;
566 chunkStart = position;
567 } else {
568 ++position;
569 }
570 }
571
572 throw (0, _syntaxError.syntaxError)(source, position, 'Unterminated string.');
573}
574/**
575 * Converts four hexadecimal chars to the integer that the
576 * string represents. For example, uniCharCode('0','0','0','f')
577 * will return 15, and uniCharCode('0','0','f','f') returns 255.
578 *
579 * Returns a negative number on error, if a char was invalid.
580 *
581 * This is implemented by noting that char2hex() returns -1 on error,
582 * which means the result of ORing the char2hex() will also be negative.
583 */
584
585
586function uniCharCode(a, b, c, d) {
587 return char2hex(a) << 12 | char2hex(b) << 8 | char2hex(c) << 4 | char2hex(d);
588}
589/**
590 * Converts a hex character to its integer value.
591 * '0' becomes 0, '9' becomes 9
592 * 'A' becomes 10, 'F' becomes 15
593 * 'a' becomes 10, 'f' becomes 15
594 *
595 * Returns -1 on error.
596 */
597
598
599function char2hex(a) {
600 return a >= 48 && a <= 57 ? a - 48 // 0-9
601 : a >= 65 && a <= 70 ? a - 55 // A-F
602 : a >= 97 && a <= 102 ? a - 87 // a-f
603 : -1;
604}
605/**
606 * Reads an alphanumeric + underscore name from the source.
607 *
608 * [_A-Za-z][_0-9A-Za-z]*
609 */
610
611
612function readName(source, start, line, col, prev) {
613 var body = source.body;
614 var bodyLength = body.length;
615 var position = start + 1;
616 var code = 0;
617
618 while (position !== bodyLength && !isNaN(code = body.charCodeAt(position)) && (code === 95 || // _
619 code >= 48 && code <= 57 || // 0-9
620 code >= 65 && code <= 90 || // A-Z
621 code >= 97 && code <= 122) // a-z
622 ) {
623 ++position;
624 }
625
626 return new Tok(_tokenKind.TokenKind.NAME, start, position, line, col, prev, body.slice(start, position));
627}