1 | 'use strict';
|
2 |
|
3 | var cst = require('./cst.js');
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 |
|
53 |
|
54 |
|
55 |
|
56 |
|
57 |
|
58 |
|
59 |
|
60 |
|
61 |
|
62 |
|
63 |
|
64 |
|
65 |
|
66 |
|
67 |
|
68 |
|
69 |
|
70 |
|
71 |
|
72 | function isEmpty(ch) {
|
73 | switch (ch) {
|
74 | case undefined:
|
75 | case ' ':
|
76 | case '\n':
|
77 | case '\r':
|
78 | case '\t':
|
79 | return true;
|
80 | default:
|
81 | return false;
|
82 | }
|
83 | }
|
84 | const hexDigits = new Set('0123456789ABCDEFabcdef');
|
85 | const tagChars = new Set("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-#;/?:@&=+$_.!~*'()");
|
86 | const flowIndicatorChars = new Set(',[]{}');
|
87 | const invalidAnchorChars = new Set(' ,[]{}\n\r\t');
|
88 | const isNotAnchorChar = (ch) => !ch || invalidAnchorChars.has(ch);
|
89 |
|
90 |
|
91 |
|
92 |
|
93 |
|
94 |
|
95 |
|
96 |
|
97 |
|
98 |
|
99 |
|
100 |
|
101 |
|
102 |
|
103 |
|
104 | class Lexer {
|
105 | constructor() {
|
106 | |
107 |
|
108 |
|
109 |
|
110 | this.atEnd = false;
|
111 | |
112 |
|
113 |
|
114 |
|
115 |
|
116 | this.blockScalarIndent = -1;
|
117 | |
118 |
|
119 |
|
120 |
|
121 |
|
122 | this.blockScalarKeep = false;
|
123 |
|
124 | this.buffer = '';
|
125 | |
126 |
|
127 |
|
128 |
|
129 | this.flowKey = false;
|
130 |
|
131 | this.flowLevel = 0;
|
132 | |
133 |
|
134 |
|
135 |
|
136 | this.indentNext = 0;
|
137 |
|
138 | this.indentValue = 0;
|
139 |
|
140 | this.lineEndPos = null;
|
141 |
|
142 | this.next = null;
|
143 |
|
144 | this.pos = 0;
|
145 | }
|
146 | |
147 |
|
148 |
|
149 |
|
150 |
|
151 |
|
152 | *lex(source, incomplete = false) {
|
153 | if (source) {
|
154 | if (typeof source !== 'string')
|
155 | throw TypeError('source is not a string');
|
156 | this.buffer = this.buffer ? this.buffer + source : source;
|
157 | this.lineEndPos = null;
|
158 | }
|
159 | this.atEnd = !incomplete;
|
160 | let next = this.next ?? 'stream';
|
161 | while (next && (incomplete || this.hasChars(1)))
|
162 | next = yield* this.parseNext(next);
|
163 | }
|
164 | atLineEnd() {
|
165 | let i = this.pos;
|
166 | let ch = this.buffer[i];
|
167 | while (ch === ' ' || ch === '\t')
|
168 | ch = this.buffer[++i];
|
169 | if (!ch || ch === '#' || ch === '\n')
|
170 | return true;
|
171 | if (ch === '\r')
|
172 | return this.buffer[i + 1] === '\n';
|
173 | return false;
|
174 | }
|
175 | charAt(n) {
|
176 | return this.buffer[this.pos + n];
|
177 | }
|
178 | continueScalar(offset) {
|
179 | let ch = this.buffer[offset];
|
180 | if (this.indentNext > 0) {
|
181 | let indent = 0;
|
182 | while (ch === ' ')
|
183 | ch = this.buffer[++indent + offset];
|
184 | if (ch === '\r') {
|
185 | const next = this.buffer[indent + offset + 1];
|
186 | if (next === '\n' || (!next && !this.atEnd))
|
187 | return offset + indent + 1;
|
188 | }
|
189 | return ch === '\n' || indent >= this.indentNext || (!ch && !this.atEnd)
|
190 | ? offset + indent
|
191 | : -1;
|
192 | }
|
193 | if (ch === '-' || ch === '.') {
|
194 | const dt = this.buffer.substr(offset, 3);
|
195 | if ((dt === '---' || dt === '...') && isEmpty(this.buffer[offset + 3]))
|
196 | return -1;
|
197 | }
|
198 | return offset;
|
199 | }
|
200 | getLine() {
|
201 | let end = this.lineEndPos;
|
202 | if (typeof end !== 'number' || (end !== -1 && end < this.pos)) {
|
203 | end = this.buffer.indexOf('\n', this.pos);
|
204 | this.lineEndPos = end;
|
205 | }
|
206 | if (end === -1)
|
207 | return this.atEnd ? this.buffer.substring(this.pos) : null;
|
208 | if (this.buffer[end - 1] === '\r')
|
209 | end -= 1;
|
210 | return this.buffer.substring(this.pos, end);
|
211 | }
|
212 | hasChars(n) {
|
213 | return this.pos + n <= this.buffer.length;
|
214 | }
|
215 | setNext(state) {
|
216 | this.buffer = this.buffer.substring(this.pos);
|
217 | this.pos = 0;
|
218 | this.lineEndPos = null;
|
219 | this.next = state;
|
220 | return null;
|
221 | }
|
222 | peek(n) {
|
223 | return this.buffer.substr(this.pos, n);
|
224 | }
|
225 | *parseNext(next) {
|
226 | switch (next) {
|
227 | case 'stream':
|
228 | return yield* this.parseStream();
|
229 | case 'line-start':
|
230 | return yield* this.parseLineStart();
|
231 | case 'block-start':
|
232 | return yield* this.parseBlockStart();
|
233 | case 'doc':
|
234 | return yield* this.parseDocument();
|
235 | case 'flow':
|
236 | return yield* this.parseFlowCollection();
|
237 | case 'quoted-scalar':
|
238 | return yield* this.parseQuotedScalar();
|
239 | case 'block-scalar':
|
240 | return yield* this.parseBlockScalar();
|
241 | case 'plain-scalar':
|
242 | return yield* this.parsePlainScalar();
|
243 | }
|
244 | }
|
245 | *parseStream() {
|
246 | let line = this.getLine();
|
247 | if (line === null)
|
248 | return this.setNext('stream');
|
249 | if (line[0] === cst.BOM) {
|
250 | yield* this.pushCount(1);
|
251 | line = line.substring(1);
|
252 | }
|
253 | if (line[0] === '%') {
|
254 | let dirEnd = line.length;
|
255 | let cs = line.indexOf('#');
|
256 | while (cs !== -1) {
|
257 | const ch = line[cs - 1];
|
258 | if (ch === ' ' || ch === '\t') {
|
259 | dirEnd = cs - 1;
|
260 | break;
|
261 | }
|
262 | else {
|
263 | cs = line.indexOf('#', cs + 1);
|
264 | }
|
265 | }
|
266 | while (true) {
|
267 | const ch = line[dirEnd - 1];
|
268 | if (ch === ' ' || ch === '\t')
|
269 | dirEnd -= 1;
|
270 | else
|
271 | break;
|
272 | }
|
273 | const n = (yield* this.pushCount(dirEnd)) + (yield* this.pushSpaces(true));
|
274 | yield* this.pushCount(line.length - n);
|
275 | this.pushNewline();
|
276 | return 'stream';
|
277 | }
|
278 | if (this.atLineEnd()) {
|
279 | const sp = yield* this.pushSpaces(true);
|
280 | yield* this.pushCount(line.length - sp);
|
281 | yield* this.pushNewline();
|
282 | return 'stream';
|
283 | }
|
284 | yield cst.DOCUMENT;
|
285 | return yield* this.parseLineStart();
|
286 | }
|
287 | *parseLineStart() {
|
288 | const ch = this.charAt(0);
|
289 | if (!ch && !this.atEnd)
|
290 | return this.setNext('line-start');
|
291 | if (ch === '-' || ch === '.') {
|
292 | if (!this.atEnd && !this.hasChars(4))
|
293 | return this.setNext('line-start');
|
294 | const s = this.peek(3);
|
295 | if ((s === '---' || s === '...') && isEmpty(this.charAt(3))) {
|
296 | yield* this.pushCount(3);
|
297 | this.indentValue = 0;
|
298 | this.indentNext = 0;
|
299 | return s === '---' ? 'doc' : 'stream';
|
300 | }
|
301 | }
|
302 | this.indentValue = yield* this.pushSpaces(false);
|
303 | if (this.indentNext > this.indentValue && !isEmpty(this.charAt(1)))
|
304 | this.indentNext = this.indentValue;
|
305 | return yield* this.parseBlockStart();
|
306 | }
|
307 | *parseBlockStart() {
|
308 | const [ch0, ch1] = this.peek(2);
|
309 | if (!ch1 && !this.atEnd)
|
310 | return this.setNext('block-start');
|
311 | if ((ch0 === '-' || ch0 === '?' || ch0 === ':') && isEmpty(ch1)) {
|
312 | const n = (yield* this.pushCount(1)) + (yield* this.pushSpaces(true));
|
313 | this.indentNext = this.indentValue + 1;
|
314 | this.indentValue += n;
|
315 | return yield* this.parseBlockStart();
|
316 | }
|
317 | return 'doc';
|
318 | }
|
319 | *parseDocument() {
|
320 | yield* this.pushSpaces(true);
|
321 | const line = this.getLine();
|
322 | if (line === null)
|
323 | return this.setNext('doc');
|
324 | let n = yield* this.pushIndicators();
|
325 | switch (line[n]) {
|
326 | case '#':
|
327 | yield* this.pushCount(line.length - n);
|
328 |
|
329 | case undefined:
|
330 | yield* this.pushNewline();
|
331 | return yield* this.parseLineStart();
|
332 | case '{':
|
333 | case '[':
|
334 | yield* this.pushCount(1);
|
335 | this.flowKey = false;
|
336 | this.flowLevel = 1;
|
337 | return 'flow';
|
338 | case '}':
|
339 | case ']':
|
340 |
|
341 | yield* this.pushCount(1);
|
342 | return 'doc';
|
343 | case '*':
|
344 | yield* this.pushUntil(isNotAnchorChar);
|
345 | return 'doc';
|
346 | case '"':
|
347 | case "'":
|
348 | return yield* this.parseQuotedScalar();
|
349 | case '|':
|
350 | case '>':
|
351 | n += yield* this.parseBlockScalarHeader();
|
352 | n += yield* this.pushSpaces(true);
|
353 | yield* this.pushCount(line.length - n);
|
354 | yield* this.pushNewline();
|
355 | return yield* this.parseBlockScalar();
|
356 | default:
|
357 | return yield* this.parsePlainScalar();
|
358 | }
|
359 | }
|
360 | *parseFlowCollection() {
|
361 | let nl, sp;
|
362 | let indent = -1;
|
363 | do {
|
364 | nl = yield* this.pushNewline();
|
365 | if (nl > 0) {
|
366 | sp = yield* this.pushSpaces(false);
|
367 | this.indentValue = indent = sp;
|
368 | }
|
369 | else {
|
370 | sp = 0;
|
371 | }
|
372 | sp += yield* this.pushSpaces(true);
|
373 | } while (nl + sp > 0);
|
374 | const line = this.getLine();
|
375 | if (line === null)
|
376 | return this.setNext('flow');
|
377 | if ((indent !== -1 && indent < this.indentNext && line[0] !== '#') ||
|
378 | (indent === 0 &&
|
379 | (line.startsWith('---') || line.startsWith('...')) &&
|
380 | isEmpty(line[3]))) {
|
381 |
|
382 |
|
383 |
|
384 | const atFlowEndMarker = indent === this.indentNext - 1 &&
|
385 | this.flowLevel === 1 &&
|
386 | (line[0] === ']' || line[0] === '}');
|
387 | if (!atFlowEndMarker) {
|
388 |
|
389 | this.flowLevel = 0;
|
390 | yield cst.FLOW_END;
|
391 | return yield* this.parseLineStart();
|
392 | }
|
393 | }
|
394 | let n = 0;
|
395 | while (line[n] === ',') {
|
396 | n += yield* this.pushCount(1);
|
397 | n += yield* this.pushSpaces(true);
|
398 | this.flowKey = false;
|
399 | }
|
400 | n += yield* this.pushIndicators();
|
401 | switch (line[n]) {
|
402 | case undefined:
|
403 | return 'flow';
|
404 | case '#':
|
405 | yield* this.pushCount(line.length - n);
|
406 | return 'flow';
|
407 | case '{':
|
408 | case '[':
|
409 | yield* this.pushCount(1);
|
410 | this.flowKey = false;
|
411 | this.flowLevel += 1;
|
412 | return 'flow';
|
413 | case '}':
|
414 | case ']':
|
415 | yield* this.pushCount(1);
|
416 | this.flowKey = true;
|
417 | this.flowLevel -= 1;
|
418 | return this.flowLevel ? 'flow' : 'doc';
|
419 | case '*':
|
420 | yield* this.pushUntil(isNotAnchorChar);
|
421 | return 'flow';
|
422 | case '"':
|
423 | case "'":
|
424 | this.flowKey = true;
|
425 | return yield* this.parseQuotedScalar();
|
426 | case ':': {
|
427 | const next = this.charAt(1);
|
428 | if (this.flowKey || isEmpty(next) || next === ',') {
|
429 | this.flowKey = false;
|
430 | yield* this.pushCount(1);
|
431 | yield* this.pushSpaces(true);
|
432 | return 'flow';
|
433 | }
|
434 | }
|
435 |
|
436 | default:
|
437 | this.flowKey = false;
|
438 | return yield* this.parsePlainScalar();
|
439 | }
|
440 | }
|
441 | *parseQuotedScalar() {
|
442 | const quote = this.charAt(0);
|
443 | let end = this.buffer.indexOf(quote, this.pos + 1);
|
444 | if (quote === "'") {
|
445 | while (end !== -1 && this.buffer[end + 1] === "'")
|
446 | end = this.buffer.indexOf("'", end + 2);
|
447 | }
|
448 | else {
|
449 |
|
450 | while (end !== -1) {
|
451 | let n = 0;
|
452 | while (this.buffer[end - 1 - n] === '\\')
|
453 | n += 1;
|
454 | if (n % 2 === 0)
|
455 | break;
|
456 | end = this.buffer.indexOf('"', end + 1);
|
457 | }
|
458 | }
|
459 |
|
460 | const qb = this.buffer.substring(0, end);
|
461 | let nl = qb.indexOf('\n', this.pos);
|
462 | if (nl !== -1) {
|
463 | while (nl !== -1) {
|
464 | const cs = this.continueScalar(nl + 1);
|
465 | if (cs === -1)
|
466 | break;
|
467 | nl = qb.indexOf('\n', cs);
|
468 | }
|
469 | if (nl !== -1) {
|
470 |
|
471 | end = nl - (qb[nl - 1] === '\r' ? 2 : 1);
|
472 | }
|
473 | }
|
474 | if (end === -1) {
|
475 | if (!this.atEnd)
|
476 | return this.setNext('quoted-scalar');
|
477 | end = this.buffer.length;
|
478 | }
|
479 | yield* this.pushToIndex(end + 1, false);
|
480 | return this.flowLevel ? 'flow' : 'doc';
|
481 | }
|
482 | *parseBlockScalarHeader() {
|
483 | this.blockScalarIndent = -1;
|
484 | this.blockScalarKeep = false;
|
485 | let i = this.pos;
|
486 | while (true) {
|
487 | const ch = this.buffer[++i];
|
488 | if (ch === '+')
|
489 | this.blockScalarKeep = true;
|
490 | else if (ch > '0' && ch <= '9')
|
491 | this.blockScalarIndent = Number(ch) - 1;
|
492 | else if (ch !== '-')
|
493 | break;
|
494 | }
|
495 | return yield* this.pushUntil(ch => isEmpty(ch) || ch === '#');
|
496 | }
|
497 | *parseBlockScalar() {
|
498 | let nl = this.pos - 1;
|
499 | let indent = 0;
|
500 | let ch;
|
501 | loop: for (let i = this.pos; (ch = this.buffer[i]); ++i) {
|
502 | switch (ch) {
|
503 | case ' ':
|
504 | indent += 1;
|
505 | break;
|
506 | case '\n':
|
507 | nl = i;
|
508 | indent = 0;
|
509 | break;
|
510 | case '\r': {
|
511 | const next = this.buffer[i + 1];
|
512 | if (!next && !this.atEnd)
|
513 | return this.setNext('block-scalar');
|
514 | if (next === '\n')
|
515 | break;
|
516 | }
|
517 | default:
|
518 | break loop;
|
519 | }
|
520 | }
|
521 | if (!ch && !this.atEnd)
|
522 | return this.setNext('block-scalar');
|
523 | if (indent >= this.indentNext) {
|
524 | if (this.blockScalarIndent === -1)
|
525 | this.indentNext = indent;
|
526 | else {
|
527 | this.indentNext =
|
528 | this.blockScalarIndent + (this.indentNext === 0 ? 1 : this.indentNext);
|
529 | }
|
530 | do {
|
531 | const cs = this.continueScalar(nl + 1);
|
532 | if (cs === -1)
|
533 | break;
|
534 | nl = this.buffer.indexOf('\n', cs);
|
535 | } while (nl !== -1);
|
536 | if (nl === -1) {
|
537 | if (!this.atEnd)
|
538 | return this.setNext('block-scalar');
|
539 | nl = this.buffer.length;
|
540 | }
|
541 | }
|
542 |
|
543 |
|
544 | let i = nl + 1;
|
545 | ch = this.buffer[i];
|
546 | while (ch === ' ')
|
547 | ch = this.buffer[++i];
|
548 | if (ch === '\t') {
|
549 | while (ch === '\t' || ch === ' ' || ch === '\r' || ch === '\n')
|
550 | ch = this.buffer[++i];
|
551 | nl = i - 1;
|
552 | }
|
553 | else if (!this.blockScalarKeep) {
|
554 | do {
|
555 | let i = nl - 1;
|
556 | let ch = this.buffer[i];
|
557 | if (ch === '\r')
|
558 | ch = this.buffer[--i];
|
559 | const lastChar = i;
|
560 | while (ch === ' ')
|
561 | ch = this.buffer[--i];
|
562 | if (ch === '\n' && i >= this.pos && i + 1 + indent > lastChar)
|
563 | nl = i;
|
564 | else
|
565 | break;
|
566 | } while (true);
|
567 | }
|
568 | yield cst.SCALAR;
|
569 | yield* this.pushToIndex(nl + 1, true);
|
570 | return yield* this.parseLineStart();
|
571 | }
|
572 | *parsePlainScalar() {
|
573 | const inFlow = this.flowLevel > 0;
|
574 | let end = this.pos - 1;
|
575 | let i = this.pos - 1;
|
576 | let ch;
|
577 | while ((ch = this.buffer[++i])) {
|
578 | if (ch === ':') {
|
579 | const next = this.buffer[i + 1];
|
580 | if (isEmpty(next) || (inFlow && flowIndicatorChars.has(next)))
|
581 | break;
|
582 | end = i;
|
583 | }
|
584 | else if (isEmpty(ch)) {
|
585 | let next = this.buffer[i + 1];
|
586 | if (ch === '\r') {
|
587 | if (next === '\n') {
|
588 | i += 1;
|
589 | ch = '\n';
|
590 | next = this.buffer[i + 1];
|
591 | }
|
592 | else
|
593 | end = i;
|
594 | }
|
595 | if (next === '#' || (inFlow && flowIndicatorChars.has(next)))
|
596 | break;
|
597 | if (ch === '\n') {
|
598 | const cs = this.continueScalar(i + 1);
|
599 | if (cs === -1)
|
600 | break;
|
601 | i = Math.max(i, cs - 2);
|
602 | }
|
603 | }
|
604 | else {
|
605 | if (inFlow && flowIndicatorChars.has(ch))
|
606 | break;
|
607 | end = i;
|
608 | }
|
609 | }
|
610 | if (!ch && !this.atEnd)
|
611 | return this.setNext('plain-scalar');
|
612 | yield cst.SCALAR;
|
613 | yield* this.pushToIndex(end + 1, true);
|
614 | return inFlow ? 'flow' : 'doc';
|
615 | }
|
616 | *pushCount(n) {
|
617 | if (n > 0) {
|
618 | yield this.buffer.substr(this.pos, n);
|
619 | this.pos += n;
|
620 | return n;
|
621 | }
|
622 | return 0;
|
623 | }
|
624 | *pushToIndex(i, allowEmpty) {
|
625 | const s = this.buffer.slice(this.pos, i);
|
626 | if (s) {
|
627 | yield s;
|
628 | this.pos += s.length;
|
629 | return s.length;
|
630 | }
|
631 | else if (allowEmpty)
|
632 | yield '';
|
633 | return 0;
|
634 | }
|
635 | *pushIndicators() {
|
636 | switch (this.charAt(0)) {
|
637 | case '!':
|
638 | return ((yield* this.pushTag()) +
|
639 | (yield* this.pushSpaces(true)) +
|
640 | (yield* this.pushIndicators()));
|
641 | case '&':
|
642 | return ((yield* this.pushUntil(isNotAnchorChar)) +
|
643 | (yield* this.pushSpaces(true)) +
|
644 | (yield* this.pushIndicators()));
|
645 | case '-':
|
646 | case '?':
|
647 | case ':': {
|
648 | const inFlow = this.flowLevel > 0;
|
649 | const ch1 = this.charAt(1);
|
650 | if (isEmpty(ch1) || (inFlow && flowIndicatorChars.has(ch1))) {
|
651 | if (!inFlow)
|
652 | this.indentNext = this.indentValue + 1;
|
653 | else if (this.flowKey)
|
654 | this.flowKey = false;
|
655 | return ((yield* this.pushCount(1)) +
|
656 | (yield* this.pushSpaces(true)) +
|
657 | (yield* this.pushIndicators()));
|
658 | }
|
659 | }
|
660 | }
|
661 | return 0;
|
662 | }
|
663 | *pushTag() {
|
664 | if (this.charAt(1) === '<') {
|
665 | let i = this.pos + 2;
|
666 | let ch = this.buffer[i];
|
667 | while (!isEmpty(ch) && ch !== '>')
|
668 | ch = this.buffer[++i];
|
669 | return yield* this.pushToIndex(ch === '>' ? i + 1 : i, false);
|
670 | }
|
671 | else {
|
672 | let i = this.pos + 1;
|
673 | let ch = this.buffer[i];
|
674 | while (ch) {
|
675 | if (tagChars.has(ch))
|
676 | ch = this.buffer[++i];
|
677 | else if (ch === '%' &&
|
678 | hexDigits.has(this.buffer[i + 1]) &&
|
679 | hexDigits.has(this.buffer[i + 2])) {
|
680 | ch = this.buffer[(i += 3)];
|
681 | }
|
682 | else
|
683 | break;
|
684 | }
|
685 | return yield* this.pushToIndex(i, false);
|
686 | }
|
687 | }
|
688 | *pushNewline() {
|
689 | const ch = this.buffer[this.pos];
|
690 | if (ch === '\n')
|
691 | return yield* this.pushCount(1);
|
692 | else if (ch === '\r' && this.charAt(1) === '\n')
|
693 | return yield* this.pushCount(2);
|
694 | else
|
695 | return 0;
|
696 | }
|
697 | *pushSpaces(allowTabs) {
|
698 | let i = this.pos - 1;
|
699 | let ch;
|
700 | do {
|
701 | ch = this.buffer[++i];
|
702 | } while (ch === ' ' || (allowTabs && ch === '\t'));
|
703 | const n = i - this.pos;
|
704 | if (n > 0) {
|
705 | yield this.buffer.substr(this.pos, n);
|
706 | this.pos = i;
|
707 | }
|
708 | return n;
|
709 | }
|
710 | *pushUntil(test) {
|
711 | let i = this.pos;
|
712 | let ch = this.buffer[i];
|
713 | while (!test(ch))
|
714 | ch = this.buffer[++i];
|
715 | return yield* this.pushToIndex(i, false);
|
716 | }
|
717 | }
|
718 |
|
719 | exports.Lexer = Lexer;
|