UNPKG

24.1 kBJavaScriptView Raw
1'use strict';
2
3var cst = require('./cst.js');
4
5/*
6START -> stream
7
8stream
9 directive -> line-end -> stream
10 indent + line-end -> stream
11 [else] -> line-start
12
13line-end
14 comment -> line-end
15 newline -> .
16 input-end -> END
17
18line-start
19 doc-start -> doc
20 doc-end -> stream
21 [else] -> indent -> block-start
22
23block-start
24 seq-item-start -> block-start
25 explicit-key-start -> block-start
26 map-value-start -> block-start
27 [else] -> doc
28
29doc
30 line-end -> line-start
31 spaces -> doc
32 anchor -> doc
33 tag -> doc
34 flow-start -> flow -> doc
35 flow-end -> error -> doc
36 seq-item-start -> error -> doc
37 explicit-key-start -> error -> doc
38 map-value-start -> doc
39 alias -> doc
40 quote-start -> quoted-scalar -> doc
41 block-scalar-header -> line-end -> block-scalar(min) -> line-start
42 [else] -> plain-scalar(false, min) -> doc
43
44flow
45 line-end -> flow
46 spaces -> flow
47 anchor -> flow
48 tag -> flow
49 flow-start -> flow -> flow
50 flow-end -> .
51 seq-item-start -> error -> flow
52 explicit-key-start -> flow
53 map-value-start -> flow
54 alias -> flow
55 quote-start -> quoted-scalar -> flow
56 comma -> flow
57 [else] -> plain-scalar(true, 0) -> flow
58
59quoted-scalar
60 quote-end -> .
61 [else] -> quoted-scalar
62
63block-scalar(min)
64 newline + peek(indent < min) -> .
65 [else] -> block-scalar(min)
66
67plain-scalar(is-flow, min)
68 scalar-end(is-flow) -> .
69 peek(newline + (indent < min)) -> .
70 [else] -> plain-scalar(min)
71*/
72function isEmpty(ch) {
73 switch (ch) {
74 case undefined:
75 case ' ':
76 case '\n':
77 case '\r':
78 case '\t':
79 return true;
80 default:
81 return false;
82 }
83}
84const hexDigits = new Set('0123456789ABCDEFabcdef');
85const tagChars = new Set("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-#;/?:@&=+$_.!~*'()");
86const flowIndicatorChars = new Set(',[]{}');
87const invalidAnchorChars = new Set(' ,[]{}\n\r\t');
88const isNotAnchorChar = (ch) => !ch || invalidAnchorChars.has(ch);
89/**
90 * Splits an input string into lexical tokens, i.e. smaller strings that are
91 * easily identifiable by `tokens.tokenType()`.
92 *
93 * Lexing starts always in a "stream" context. Incomplete input may be buffered
94 * until a complete token can be emitted.
95 *
96 * In addition to slices of the original input, the following control characters
97 * may also be emitted:
98 *
99 * - `\x02` (Start of Text): A document starts with the next token
100 * - `\x18` (Cancel): Unexpected end of flow-mode (indicates an error)
101 * - `\x1f` (Unit Separator): Next token is a scalar value
102 * - `\u{FEFF}` (Byte order mark): Emitted separately outside documents
103 */
104class Lexer {
105 constructor() {
106 /**
107 * Flag indicating whether the end of the current buffer marks the end of
108 * all input
109 */
110 this.atEnd = false;
111 /**
112 * Explicit indent set in block scalar header, as an offset from the current
113 * minimum indent, so e.g. set to 1 from a header `|2+`. Set to -1 if not
114 * explicitly set.
115 */
116 this.blockScalarIndent = -1;
117 /**
118 * Block scalars that include a + (keep) chomping indicator in their header
119 * include trailing empty lines, which are otherwise excluded from the
120 * scalar's contents.
121 */
122 this.blockScalarKeep = false;
123 /** Current input */
124 this.buffer = '';
125 /**
126 * Flag noting whether the map value indicator : can immediately follow this
127 * node within a flow context.
128 */
129 this.flowKey = false;
130 /** Count of surrounding flow collection levels. */
131 this.flowLevel = 0;
132 /**
133 * Minimum level of indentation required for next lines to be parsed as a
134 * part of the current scalar value.
135 */
136 this.indentNext = 0;
137 /** Indentation level of the current line. */
138 this.indentValue = 0;
139 /** Position of the next \n character. */
140 this.lineEndPos = null;
141 /** Stores the state of the lexer if reaching the end of incpomplete input */
142 this.next = null;
143 /** A pointer to `buffer`; the current position of the lexer. */
144 this.pos = 0;
145 }
146 /**
147 * Generate YAML tokens from the `source` string. If `incomplete`,
148 * a part of the last line may be left as a buffer for the next call.
149 *
150 * @returns A generator of lexical tokens
151 */
152 *lex(source, incomplete = false) {
153 if (source) {
154 if (typeof source !== 'string')
155 throw TypeError('source is not a string');
156 this.buffer = this.buffer ? this.buffer + source : source;
157 this.lineEndPos = null;
158 }
159 this.atEnd = !incomplete;
160 let next = this.next ?? 'stream';
161 while (next && (incomplete || this.hasChars(1)))
162 next = yield* this.parseNext(next);
163 }
164 atLineEnd() {
165 let i = this.pos;
166 let ch = this.buffer[i];
167 while (ch === ' ' || ch === '\t')
168 ch = this.buffer[++i];
169 if (!ch || ch === '#' || ch === '\n')
170 return true;
171 if (ch === '\r')
172 return this.buffer[i + 1] === '\n';
173 return false;
174 }
175 charAt(n) {
176 return this.buffer[this.pos + n];
177 }
178 continueScalar(offset) {
179 let ch = this.buffer[offset];
180 if (this.indentNext > 0) {
181 let indent = 0;
182 while (ch === ' ')
183 ch = this.buffer[++indent + offset];
184 if (ch === '\r') {
185 const next = this.buffer[indent + offset + 1];
186 if (next === '\n' || (!next && !this.atEnd))
187 return offset + indent + 1;
188 }
189 return ch === '\n' || indent >= this.indentNext || (!ch && !this.atEnd)
190 ? offset + indent
191 : -1;
192 }
193 if (ch === '-' || ch === '.') {
194 const dt = this.buffer.substr(offset, 3);
195 if ((dt === '---' || dt === '...') && isEmpty(this.buffer[offset + 3]))
196 return -1;
197 }
198 return offset;
199 }
200 getLine() {
201 let end = this.lineEndPos;
202 if (typeof end !== 'number' || (end !== -1 && end < this.pos)) {
203 end = this.buffer.indexOf('\n', this.pos);
204 this.lineEndPos = end;
205 }
206 if (end === -1)
207 return this.atEnd ? this.buffer.substring(this.pos) : null;
208 if (this.buffer[end - 1] === '\r')
209 end -= 1;
210 return this.buffer.substring(this.pos, end);
211 }
212 hasChars(n) {
213 return this.pos + n <= this.buffer.length;
214 }
215 setNext(state) {
216 this.buffer = this.buffer.substring(this.pos);
217 this.pos = 0;
218 this.lineEndPos = null;
219 this.next = state;
220 return null;
221 }
222 peek(n) {
223 return this.buffer.substr(this.pos, n);
224 }
225 *parseNext(next) {
226 switch (next) {
227 case 'stream':
228 return yield* this.parseStream();
229 case 'line-start':
230 return yield* this.parseLineStart();
231 case 'block-start':
232 return yield* this.parseBlockStart();
233 case 'doc':
234 return yield* this.parseDocument();
235 case 'flow':
236 return yield* this.parseFlowCollection();
237 case 'quoted-scalar':
238 return yield* this.parseQuotedScalar();
239 case 'block-scalar':
240 return yield* this.parseBlockScalar();
241 case 'plain-scalar':
242 return yield* this.parsePlainScalar();
243 }
244 }
245 *parseStream() {
246 let line = this.getLine();
247 if (line === null)
248 return this.setNext('stream');
249 if (line[0] === cst.BOM) {
250 yield* this.pushCount(1);
251 line = line.substring(1);
252 }
253 if (line[0] === '%') {
254 let dirEnd = line.length;
255 let cs = line.indexOf('#');
256 while (cs !== -1) {
257 const ch = line[cs - 1];
258 if (ch === ' ' || ch === '\t') {
259 dirEnd = cs - 1;
260 break;
261 }
262 else {
263 cs = line.indexOf('#', cs + 1);
264 }
265 }
266 while (true) {
267 const ch = line[dirEnd - 1];
268 if (ch === ' ' || ch === '\t')
269 dirEnd -= 1;
270 else
271 break;
272 }
273 const n = (yield* this.pushCount(dirEnd)) + (yield* this.pushSpaces(true));
274 yield* this.pushCount(line.length - n); // possible comment
275 this.pushNewline();
276 return 'stream';
277 }
278 if (this.atLineEnd()) {
279 const sp = yield* this.pushSpaces(true);
280 yield* this.pushCount(line.length - sp);
281 yield* this.pushNewline();
282 return 'stream';
283 }
284 yield cst.DOCUMENT;
285 return yield* this.parseLineStart();
286 }
287 *parseLineStart() {
288 const ch = this.charAt(0);
289 if (!ch && !this.atEnd)
290 return this.setNext('line-start');
291 if (ch === '-' || ch === '.') {
292 if (!this.atEnd && !this.hasChars(4))
293 return this.setNext('line-start');
294 const s = this.peek(3);
295 if ((s === '---' || s === '...') && isEmpty(this.charAt(3))) {
296 yield* this.pushCount(3);
297 this.indentValue = 0;
298 this.indentNext = 0;
299 return s === '---' ? 'doc' : 'stream';
300 }
301 }
302 this.indentValue = yield* this.pushSpaces(false);
303 if (this.indentNext > this.indentValue && !isEmpty(this.charAt(1)))
304 this.indentNext = this.indentValue;
305 return yield* this.parseBlockStart();
306 }
307 *parseBlockStart() {
308 const [ch0, ch1] = this.peek(2);
309 if (!ch1 && !this.atEnd)
310 return this.setNext('block-start');
311 if ((ch0 === '-' || ch0 === '?' || ch0 === ':') && isEmpty(ch1)) {
312 const n = (yield* this.pushCount(1)) + (yield* this.pushSpaces(true));
313 this.indentNext = this.indentValue + 1;
314 this.indentValue += n;
315 return yield* this.parseBlockStart();
316 }
317 return 'doc';
318 }
319 *parseDocument() {
320 yield* this.pushSpaces(true);
321 const line = this.getLine();
322 if (line === null)
323 return this.setNext('doc');
324 let n = yield* this.pushIndicators();
325 switch (line[n]) {
326 case '#':
327 yield* this.pushCount(line.length - n);
328 // fallthrough
329 case undefined:
330 yield* this.pushNewline();
331 return yield* this.parseLineStart();
332 case '{':
333 case '[':
334 yield* this.pushCount(1);
335 this.flowKey = false;
336 this.flowLevel = 1;
337 return 'flow';
338 case '}':
339 case ']':
340 // this is an error
341 yield* this.pushCount(1);
342 return 'doc';
343 case '*':
344 yield* this.pushUntil(isNotAnchorChar);
345 return 'doc';
346 case '"':
347 case "'":
348 return yield* this.parseQuotedScalar();
349 case '|':
350 case '>':
351 n += yield* this.parseBlockScalarHeader();
352 n += yield* this.pushSpaces(true);
353 yield* this.pushCount(line.length - n);
354 yield* this.pushNewline();
355 return yield* this.parseBlockScalar();
356 default:
357 return yield* this.parsePlainScalar();
358 }
359 }
360 *parseFlowCollection() {
361 let nl, sp;
362 let indent = -1;
363 do {
364 nl = yield* this.pushNewline();
365 if (nl > 0) {
366 sp = yield* this.pushSpaces(false);
367 this.indentValue = indent = sp;
368 }
369 else {
370 sp = 0;
371 }
372 sp += yield* this.pushSpaces(true);
373 } while (nl + sp > 0);
374 const line = this.getLine();
375 if (line === null)
376 return this.setNext('flow');
377 if ((indent !== -1 && indent < this.indentNext && line[0] !== '#') ||
378 (indent === 0 &&
379 (line.startsWith('---') || line.startsWith('...')) &&
380 isEmpty(line[3]))) {
381 // Allowing for the terminal ] or } at the same (rather than greater)
382 // indent level as the initial [ or { is technically invalid, but
383 // failing here would be surprising to users.
384 const atFlowEndMarker = indent === this.indentNext - 1 &&
385 this.flowLevel === 1 &&
386 (line[0] === ']' || line[0] === '}');
387 if (!atFlowEndMarker) {
388 // this is an error
389 this.flowLevel = 0;
390 yield cst.FLOW_END;
391 return yield* this.parseLineStart();
392 }
393 }
394 let n = 0;
395 while (line[n] === ',') {
396 n += yield* this.pushCount(1);
397 n += yield* this.pushSpaces(true);
398 this.flowKey = false;
399 }
400 n += yield* this.pushIndicators();
401 switch (line[n]) {
402 case undefined:
403 return 'flow';
404 case '#':
405 yield* this.pushCount(line.length - n);
406 return 'flow';
407 case '{':
408 case '[':
409 yield* this.pushCount(1);
410 this.flowKey = false;
411 this.flowLevel += 1;
412 return 'flow';
413 case '}':
414 case ']':
415 yield* this.pushCount(1);
416 this.flowKey = true;
417 this.flowLevel -= 1;
418 return this.flowLevel ? 'flow' : 'doc';
419 case '*':
420 yield* this.pushUntil(isNotAnchorChar);
421 return 'flow';
422 case '"':
423 case "'":
424 this.flowKey = true;
425 return yield* this.parseQuotedScalar();
426 case ':': {
427 const next = this.charAt(1);
428 if (this.flowKey || isEmpty(next) || next === ',') {
429 this.flowKey = false;
430 yield* this.pushCount(1);
431 yield* this.pushSpaces(true);
432 return 'flow';
433 }
434 }
435 // fallthrough
436 default:
437 this.flowKey = false;
438 return yield* this.parsePlainScalar();
439 }
440 }
441 *parseQuotedScalar() {
442 const quote = this.charAt(0);
443 let end = this.buffer.indexOf(quote, this.pos + 1);
444 if (quote === "'") {
445 while (end !== -1 && this.buffer[end + 1] === "'")
446 end = this.buffer.indexOf("'", end + 2);
447 }
448 else {
449 // double-quote
450 while (end !== -1) {
451 let n = 0;
452 while (this.buffer[end - 1 - n] === '\\')
453 n += 1;
454 if (n % 2 === 0)
455 break;
456 end = this.buffer.indexOf('"', end + 1);
457 }
458 }
459 // Only looking for newlines within the quotes
460 const qb = this.buffer.substring(0, end);
461 let nl = qb.indexOf('\n', this.pos);
462 if (nl !== -1) {
463 while (nl !== -1) {
464 const cs = this.continueScalar(nl + 1);
465 if (cs === -1)
466 break;
467 nl = qb.indexOf('\n', cs);
468 }
469 if (nl !== -1) {
470 // this is an error caused by an unexpected unindent
471 end = nl - (qb[nl - 1] === '\r' ? 2 : 1);
472 }
473 }
474 if (end === -1) {
475 if (!this.atEnd)
476 return this.setNext('quoted-scalar');
477 end = this.buffer.length;
478 }
479 yield* this.pushToIndex(end + 1, false);
480 return this.flowLevel ? 'flow' : 'doc';
481 }
482 *parseBlockScalarHeader() {
483 this.blockScalarIndent = -1;
484 this.blockScalarKeep = false;
485 let i = this.pos;
486 while (true) {
487 const ch = this.buffer[++i];
488 if (ch === '+')
489 this.blockScalarKeep = true;
490 else if (ch > '0' && ch <= '9')
491 this.blockScalarIndent = Number(ch) - 1;
492 else if (ch !== '-')
493 break;
494 }
495 return yield* this.pushUntil(ch => isEmpty(ch) || ch === '#');
496 }
497 *parseBlockScalar() {
498 let nl = this.pos - 1; // may be -1 if this.pos === 0
499 let indent = 0;
500 let ch;
501 loop: for (let i = this.pos; (ch = this.buffer[i]); ++i) {
502 switch (ch) {
503 case ' ':
504 indent += 1;
505 break;
506 case '\n':
507 nl = i;
508 indent = 0;
509 break;
510 case '\r': {
511 const next = this.buffer[i + 1];
512 if (!next && !this.atEnd)
513 return this.setNext('block-scalar');
514 if (next === '\n')
515 break;
516 } // fallthrough
517 default:
518 break loop;
519 }
520 }
521 if (!ch && !this.atEnd)
522 return this.setNext('block-scalar');
523 if (indent >= this.indentNext) {
524 if (this.blockScalarIndent === -1)
525 this.indentNext = indent;
526 else {
527 this.indentNext =
528 this.blockScalarIndent + (this.indentNext === 0 ? 1 : this.indentNext);
529 }
530 do {
531 const cs = this.continueScalar(nl + 1);
532 if (cs === -1)
533 break;
534 nl = this.buffer.indexOf('\n', cs);
535 } while (nl !== -1);
536 if (nl === -1) {
537 if (!this.atEnd)
538 return this.setNext('block-scalar');
539 nl = this.buffer.length;
540 }
541 }
542 // Trailing insufficiently indented tabs are invalid.
543 // To catch that during parsing, we include them in the block scalar value.
544 let i = nl + 1;
545 ch = this.buffer[i];
546 while (ch === ' ')
547 ch = this.buffer[++i];
548 if (ch === '\t') {
549 while (ch === '\t' || ch === ' ' || ch === '\r' || ch === '\n')
550 ch = this.buffer[++i];
551 nl = i - 1;
552 }
553 else if (!this.blockScalarKeep) {
554 do {
555 let i = nl - 1;
556 let ch = this.buffer[i];
557 if (ch === '\r')
558 ch = this.buffer[--i];
559 const lastChar = i; // Drop the line if last char not more indented
560 while (ch === ' ')
561 ch = this.buffer[--i];
562 if (ch === '\n' && i >= this.pos && i + 1 + indent > lastChar)
563 nl = i;
564 else
565 break;
566 } while (true);
567 }
568 yield cst.SCALAR;
569 yield* this.pushToIndex(nl + 1, true);
570 return yield* this.parseLineStart();
571 }
572 *parsePlainScalar() {
573 const inFlow = this.flowLevel > 0;
574 let end = this.pos - 1;
575 let i = this.pos - 1;
576 let ch;
577 while ((ch = this.buffer[++i])) {
578 if (ch === ':') {
579 const next = this.buffer[i + 1];
580 if (isEmpty(next) || (inFlow && flowIndicatorChars.has(next)))
581 break;
582 end = i;
583 }
584 else if (isEmpty(ch)) {
585 let next = this.buffer[i + 1];
586 if (ch === '\r') {
587 if (next === '\n') {
588 i += 1;
589 ch = '\n';
590 next = this.buffer[i + 1];
591 }
592 else
593 end = i;
594 }
595 if (next === '#' || (inFlow && flowIndicatorChars.has(next)))
596 break;
597 if (ch === '\n') {
598 const cs = this.continueScalar(i + 1);
599 if (cs === -1)
600 break;
601 i = Math.max(i, cs - 2); // to advance, but still account for ' #'
602 }
603 }
604 else {
605 if (inFlow && flowIndicatorChars.has(ch))
606 break;
607 end = i;
608 }
609 }
610 if (!ch && !this.atEnd)
611 return this.setNext('plain-scalar');
612 yield cst.SCALAR;
613 yield* this.pushToIndex(end + 1, true);
614 return inFlow ? 'flow' : 'doc';
615 }
616 *pushCount(n) {
617 if (n > 0) {
618 yield this.buffer.substr(this.pos, n);
619 this.pos += n;
620 return n;
621 }
622 return 0;
623 }
624 *pushToIndex(i, allowEmpty) {
625 const s = this.buffer.slice(this.pos, i);
626 if (s) {
627 yield s;
628 this.pos += s.length;
629 return s.length;
630 }
631 else if (allowEmpty)
632 yield '';
633 return 0;
634 }
635 *pushIndicators() {
636 switch (this.charAt(0)) {
637 case '!':
638 return ((yield* this.pushTag()) +
639 (yield* this.pushSpaces(true)) +
640 (yield* this.pushIndicators()));
641 case '&':
642 return ((yield* this.pushUntil(isNotAnchorChar)) +
643 (yield* this.pushSpaces(true)) +
644 (yield* this.pushIndicators()));
645 case '-': // this is an error
646 case '?': // this is an error outside flow collections
647 case ':': {
648 const inFlow = this.flowLevel > 0;
649 const ch1 = this.charAt(1);
650 if (isEmpty(ch1) || (inFlow && flowIndicatorChars.has(ch1))) {
651 if (!inFlow)
652 this.indentNext = this.indentValue + 1;
653 else if (this.flowKey)
654 this.flowKey = false;
655 return ((yield* this.pushCount(1)) +
656 (yield* this.pushSpaces(true)) +
657 (yield* this.pushIndicators()));
658 }
659 }
660 }
661 return 0;
662 }
663 *pushTag() {
664 if (this.charAt(1) === '<') {
665 let i = this.pos + 2;
666 let ch = this.buffer[i];
667 while (!isEmpty(ch) && ch !== '>')
668 ch = this.buffer[++i];
669 return yield* this.pushToIndex(ch === '>' ? i + 1 : i, false);
670 }
671 else {
672 let i = this.pos + 1;
673 let ch = this.buffer[i];
674 while (ch) {
675 if (tagChars.has(ch))
676 ch = this.buffer[++i];
677 else if (ch === '%' &&
678 hexDigits.has(this.buffer[i + 1]) &&
679 hexDigits.has(this.buffer[i + 2])) {
680 ch = this.buffer[(i += 3)];
681 }
682 else
683 break;
684 }
685 return yield* this.pushToIndex(i, false);
686 }
687 }
688 *pushNewline() {
689 const ch = this.buffer[this.pos];
690 if (ch === '\n')
691 return yield* this.pushCount(1);
692 else if (ch === '\r' && this.charAt(1) === '\n')
693 return yield* this.pushCount(2);
694 else
695 return 0;
696 }
697 *pushSpaces(allowTabs) {
698 let i = this.pos - 1;
699 let ch;
700 do {
701 ch = this.buffer[++i];
702 } while (ch === ' ' || (allowTabs && ch === '\t'));
703 const n = i - this.pos;
704 if (n > 0) {
705 yield this.buffer.substr(this.pos, n);
706 this.pos = i;
707 }
708 return n;
709 }
710 *pushUntil(test) {
711 let i = this.pos;
712 let ch = this.buffer[i];
713 while (!test(ch))
714 ch = this.buffer[++i];
715 return yield* this.pushToIndex(i, false);
716 }
717}
718
719exports.Lexer = Lexer;