1 | const TokenBuffer = require('./TokenBuffer')
|
2 | const NodeDefinition = require('./NodeDefinition')
|
3 | const NodeMapping = require('./NodeMapping')
|
4 |
|
5 | class Parser {
|
6 | constructor (opts = {}) {
|
7 | this.indentionSize = opts.indentionSize || 2
|
8 | this.confDir = opts.confDir
|
9 | this.matcherConf = opts.matcher
|
10 | this.keyWords = opts.keyWords
|
11 | this.scopeDelimiter = opts.scopeDelimiter
|
12 | this.filename = opts.filename || null
|
13 |
|
14 | if (!this.confDir) {
|
15 | throw new Error('The Parser.confDir parameter must be set!')
|
16 | }
|
17 |
|
18 | if (!this.matcherConf) {
|
19 | throw new Error('The Parser.matcher parameter must be set!')
|
20 | }
|
21 |
|
22 | if (!this.keyWords) {
|
23 | throw new Error('The Parser.keyWords parameter must be set!')
|
24 | }
|
25 |
|
26 | if (!this.scopeDelimiter) {
|
27 | throw new Error('The Parser.scopeDelimiter parameter must be set!')
|
28 | }
|
29 |
|
30 | this.nodeDefinition = new NodeDefinition({
|
31 | confDir: this.confDir
|
32 | })
|
33 |
|
34 | this.nodeMapping = new NodeMapping({
|
35 | confDir: this.confDir
|
36 | })
|
37 |
|
38 | this.tokenBuffer = new TokenBuffer()
|
39 | }
|
40 |
|
41 | parse (source, skipBuffer) {
|
42 | this.index = 0
|
43 | this.line = 1
|
44 | this.column = 1
|
45 | this.indention = 0
|
46 | this.source = source
|
47 | this.length = source.length
|
48 | this.lineEnd = 0
|
49 | this.columnEnd = 0
|
50 |
|
51 | this.parserFuncs = this.createMatcher(this.matcherConf)
|
52 | if (!skipBuffer) {
|
53 | this.fillBuffer()
|
54 | }
|
55 | }
|
56 |
|
57 | tokenize (source) {
|
58 | if (source) {
|
59 | this.parse(source)
|
60 | } else if (this.tokenBuffer.length === 0) {
|
61 | return this.fillBuffer()
|
62 | }
|
63 |
|
64 |
|
65 |
|
66 |
|
67 |
|
68 |
|
69 |
|
70 |
|
71 |
|
72 |
|
73 |
|
74 | return this.tokenBuffer
|
75 | }
|
76 |
|
77 | |
78 |
|
79 |
|
80 |
|
81 |
|
82 |
|
83 |
|
84 |
|
85 |
|
86 |
|
87 |
|
88 |
|
89 |
|
90 |
|
91 |
|
92 |
|
93 | nextToken () {
|
94 | return this.tokenBuffer.shift()
|
95 | }
|
96 |
|
97 | nextNode (scope) {
|
98 | const node = this.resolveToken(scope)
|
99 | if (!node) {
|
100 | return null
|
101 | }
|
102 |
|
103 | const mapNode = this.resolveMapping(node, scope)
|
104 |
|
105 | return mapNode
|
106 | }
|
107 |
|
108 | nextRealNode (scope) {
|
109 | return this.resolveToken(scope)
|
110 | }
|
111 |
|
112 | skipNext () {
|
113 | return this.tokenBuffer.shift()
|
114 | }
|
115 |
|
116 | resolveNodeName (scope) {
|
117 | return this.nodeDefinition.resolve(this.tokenBuffer, scope)
|
118 | }
|
119 |
|
120 | resolveToken (scope) {
|
121 | const nodeName = this.resolveNodeName(scope)
|
122 |
|
123 | if (!nodeName) {
|
124 | if (this.tokenBuffer.length === 0) {
|
125 | return null
|
126 | }
|
127 |
|
128 | if (this.tokenBuffer[0].type === 'indention') {
|
129 | this.syntaxError('Unhandeled indention detected!')
|
130 | }
|
131 |
|
132 | this.syntaxError('Unexpected token')
|
133 | }
|
134 |
|
135 | const node = this.createNode(nodeName, null, scope)
|
136 |
|
137 | return node
|
138 | }
|
139 |
|
140 | showNextToken () {
|
141 | return this.tokenBuffer[0]
|
142 | }
|
143 |
|
144 | createNode (nodeName, childNode) {
|
145 | const Node = require(`${this.confDir}/nodes/${nodeName}.js`)
|
146 | const node = new Node(this, childNode)
|
147 |
|
148 | return node
|
149 | }
|
150 |
|
151 | resolveMapping (node, scope) {
|
152 | let mapNode = node
|
153 |
|
154 | while (true) {
|
155 |
|
156 | const mapNodeName = this.nodeMapping.resolve(mapNode, this.tokenBuffer, scope)
|
157 |
|
158 | if (!mapNodeName) {
|
159 |
|
160 | break
|
161 | }
|
162 |
|
163 | if (mapNodeName === '$origin') {
|
164 | return node
|
165 | }
|
166 |
|
167 | if (mapNodeName === '$last') {
|
168 | return mapNode
|
169 | }
|
170 |
|
171 | const MapNode = require(`${this.confDir}/nodes/${mapNodeName}.js`)
|
172 | mapNode = new MapNode(this, mapNode)
|
173 |
|
174 | }
|
175 |
|
176 |
|
177 | return mapNode
|
178 | }
|
179 |
|
180 | createMatcher (arr) {
|
181 | function makePattern (item) {
|
182 | if (item.begin) {
|
183 | return function matchRange (self) {
|
184 | const begin = new RegExp(item.begin.source || item.begin, 'y')
|
185 | begin.lastIndex = self.index
|
186 | const end = new RegExp(item.end.source || item.end, 'g')
|
187 | if (begin.test(self.source)) {
|
188 | let value
|
189 | let nextIndex = begin.lastIndex
|
190 |
|
191 | if (item.matcher) {
|
192 | value = self.source.slice(self.index, nextIndex)
|
193 | const token = self.createToken(item.type, value, nextIndex)
|
194 | self.tokenBuffer.push(token)
|
195 |
|
196 | const subMatcher = self.createMatcher(item.matcher)
|
197 | self.fillBuffer(subMatcher)
|
198 | nextIndex = self.index
|
199 | }
|
200 |
|
201 | end.lastIndex = nextIndex
|
202 | while (true) {
|
203 | end.test(self.source)
|
204 | if (end.lastIndex < nextIndex) {
|
205 | throw new Error('Unexpected EOF reached')
|
206 | }
|
207 |
|
208 | nextIndex = end.lastIndex
|
209 | if (self.source.charAt(nextIndex - 1) === item.escape) {
|
210 | continue
|
211 | }
|
212 |
|
213 | value = self.source.slice(self.index, nextIndex)
|
214 | break
|
215 | }
|
216 |
|
217 | const token = self.createToken(item.type, value, nextIndex)
|
218 | self.tokenBuffer.push(token)
|
219 | return true
|
220 | }
|
221 |
|
222 | return false
|
223 | }
|
224 | } else {
|
225 | return function matchPattern (self) {
|
226 | const pattern = item.pattern.source
|
227 | const reg = new RegExp(pattern, 'y')
|
228 | reg.lastIndex = self.index
|
229 | const match = reg.exec(self.source)
|
230 |
|
231 | if (!match) {
|
232 | return false
|
233 | }
|
234 |
|
235 | const token = self.createToken(item.type, match[0], reg.lastIndex)
|
236 | self.tokenBuffer.push(token)
|
237 |
|
238 | if (item.matcher) {
|
239 | const subMatcher = self.createMatcher(item.matcher)
|
240 | self.fillBuffer(subMatcher)
|
241 | }
|
242 |
|
243 | return true
|
244 | }
|
245 | }
|
246 | }
|
247 |
|
248 | return arr.map((item) => {
|
249 | return makePattern(item)
|
250 | })
|
251 | }
|
252 |
|
253 | moveToNextItem (index) {
|
254 | const reg = /( |\t)+/y
|
255 | reg.lastIndex = index
|
256 | const match = reg.exec(this.source)
|
257 |
|
258 | if (!match) {
|
259 | return index
|
260 | }
|
261 |
|
262 | this.column += (reg.lastIndex - index)
|
263 | return reg.lastIndex
|
264 | }
|
265 |
|
266 | createToken (type, value, nextIndex) {
|
267 | let index = this.index
|
268 | let line = this.line
|
269 | let lineEnd = this.line
|
270 | let column = this.column
|
271 | let length = value.length
|
272 | let columnEnd = column + length - 1
|
273 | let isKeyword = false
|
274 | let tokenIndention = this.indention
|
275 |
|
276 | this.column += length
|
277 |
|
278 | if (type === 'identifier') {
|
279 | isKeyword = this.keyWords.includes(value)
|
280 | }
|
281 |
|
282 | if (type === 'indention') {
|
283 | const split = value.split('\n')
|
284 | const item = split.pop()
|
285 | length = item.length
|
286 |
|
287 | this.line += split.length
|
288 | line += split.length
|
289 | this.column = item.length + 1
|
290 | column = 1
|
291 | index += value.length - item.length
|
292 | this.index = nextIndex
|
293 |
|
294 | if (this.indentionSize) {
|
295 | if (length % this.indentionSize) {
|
296 | this.syntaxError('Unexpected indention')
|
297 | }
|
298 |
|
299 | value = parseInt(length / this.indentionSize)
|
300 | this.indention = value
|
301 | }
|
302 | } else if (['literal', 'comment', 'template'].includes(type)) {
|
303 | const split = value.split('\n')
|
304 | const lineLength = split.length
|
305 | const lastLine = split.pop()
|
306 | this.line += split.length
|
307 | if (split.length > 1) {
|
308 | lineEnd += (lineLength - 1)
|
309 | columnEnd = lastLine.length
|
310 | }
|
311 | }
|
312 |
|
313 | if (type !== 'indention') {
|
314 | this.index = this.moveToNextItem(nextIndex)
|
315 | }
|
316 |
|
317 | return {
|
318 | type: type,
|
319 | value: value,
|
320 | index: index,
|
321 | length: length,
|
322 | line: line,
|
323 | lineEnd: lineEnd,
|
324 | column: column,
|
325 | columnEnd: columnEnd,
|
326 | isKeyword: isKeyword,
|
327 | indention: tokenIndention
|
328 | }
|
329 | }
|
330 |
|
331 | sourcePreview (token) {
|
332 | token = token || this
|
333 | const startLine = Math.max(0, token.line - 3)
|
334 | const endLine = Math.max(0, token.line)
|
335 | const source = this.source.split('\n')
|
336 | const previewArr = source.slice(startLine, endLine)
|
337 | return previewArr.map((line, index) => {
|
338 | const lineNum = ` ${startLine + index + 1}`.slice(-String(endLine).length)
|
339 | return `${lineNum} | ${line}\n`
|
340 | }).join('').concat(`${' '.repeat(token.column + String(endLine).length + 2)}^\n`)
|
341 | }
|
342 |
|
343 | syntaxError (msg, token) {
|
344 | if (!token) {
|
345 |
|
346 | token = this.tokenBuffer[0] || this
|
347 | }
|
348 |
|
349 | const errorFile = this.filename ? ` in file ${this.filename}` : ''
|
350 | throw new SyntaxError(`${msg} at line ${token.line} in column ${token.column}${errorFile}\n${this.sourcePreview(token)}`)
|
351 | }
|
352 |
|
353 | getIdentifier () {
|
354 | const token = this.nextToken()
|
355 |
|
356 | if (token.type === 'identifier') {
|
357 | return token
|
358 | }
|
359 |
|
360 | this.syntaxError('Identifier token expected', token)
|
361 | }
|
362 |
|
363 | getIdentifierValue () {
|
364 | const token = this.getIdentifier()
|
365 | return token.value
|
366 | }
|
367 |
|
368 | getKeyword (value) {
|
369 | const token = this.nextToken()
|
370 |
|
371 | if (token.type === 'identifier' && this.keyWords.includes(token.value)) {
|
372 | token.type = 'keyword'
|
373 | return token
|
374 | }
|
375 |
|
376 | this.syntaxError('Keyword token expected', token)
|
377 | }
|
378 |
|
379 | getLiteral () {
|
380 | const token = this.nextToken()
|
381 |
|
382 | if (token.type === 'literal') {
|
383 | return token
|
384 | }
|
385 |
|
386 | this.syntaxError('Literal token expected', token)
|
387 | }
|
388 |
|
389 | getPunctuator () {
|
390 | const token = this.nextToken()
|
391 |
|
392 | if (token.type === 'punctuator') {
|
393 | return token
|
394 | }
|
395 |
|
396 | this.syntaxError('Punctuator token expected', token)
|
397 | }
|
398 |
|
399 | getOperator () {
|
400 | const token = this.nextToken()
|
401 |
|
402 | if (token.type === 'operator') {
|
403 | return token
|
404 | }
|
405 |
|
406 | this.syntaxError('Operator token expected', token)
|
407 | }
|
408 |
|
409 | getComment () {
|
410 | const token = this.nextToken()
|
411 | if (token.type === 'comment') {
|
412 | return token
|
413 | }
|
414 |
|
415 | this.syntaxError('Comment token expected', token)
|
416 | }
|
417 |
|
418 |
|
419 |
|
420 |
|
421 |
|
422 |
|
423 |
|
424 |
|
425 |
|
426 |
|
427 |
|
428 |
|
429 |
|
430 |
|
431 |
|
432 |
|
433 |
|
434 |
|
435 |
|
436 |
|
437 |
|
438 | match (matchString) {
|
439 |
|
440 |
|
441 |
|
442 |
|
443 |
|
444 | const matchDefinition = this.nodeDefinition.parse(matchString)
|
445 |
|
446 |
|
447 | return matchDefinition.test(this.tokenBuffer)
|
448 | }
|
449 |
|
450 |
|
451 |
|
452 |
|
453 |
|
454 |
|
455 |
|
456 |
|
457 |
|
458 |
|
459 |
|
460 |
|
461 |
|
462 |
|
463 |
|
464 |
|
465 |
|
466 |
|
467 |
|
468 |
|
469 |
|
470 |
|
471 |
|
472 |
|
473 |
|
474 |
|
475 |
|
476 |
|
477 |
|
478 | |
479 |
|
480 |
|
481 |
|
482 |
|
483 | fillBuffer (subMatch) {
|
484 | subMatch = subMatch || this.parserFuncs
|
485 | let lastIndex = null
|
486 | while (true) {
|
487 | const res = subMatch.find((fn) => {
|
488 | return fn(this)
|
489 | })
|
490 |
|
491 | if (!res) {
|
492 | if (!subMatch && this.index < this.length) {
|
493 | throw new SyntaxError(`Unexpected token at line ${this.line} in column ${this.column} \n\n${this.sourcePreview()}`)
|
494 | }
|
495 |
|
496 | lastIndex = null
|
497 | break
|
498 | }
|
499 |
|
500 | if (this.index === this.length) {
|
501 | break
|
502 | }
|
503 |
|
504 | if (lastIndex === this.index) {
|
505 | throw new Error(`Parser stucks in a loop at index ${this.index}:${this.length}! at line ${this.line} in column ${this.column} \n\n${this.sourcePreview()}`)
|
506 | }
|
507 |
|
508 | lastIndex = this.index
|
509 | }
|
510 |
|
511 | return this.tokenBuffer
|
512 | }
|
513 |
|
514 | |
515 |
|
516 |
|
517 |
|
518 |
|
519 |
|
520 |
|
521 |
|
522 |
|
523 |
|
524 |
|
525 |
|
526 |
|
527 | getPosition () {
|
528 |
|
529 |
|
530 |
|
531 |
|
532 | const token = this.tokenBuffer[0] || this
|
533 |
|
534 | return {
|
535 | index: token.index,
|
536 | length: token.length,
|
537 | line: token.line,
|
538 | lineLength: token.lineLength,
|
539 | column: token.column,
|
540 | indention: token.indention
|
541 | }
|
542 | }
|
543 |
|
544 | |
545 |
|
546 |
|
547 |
|
548 |
|
549 |
|
550 | isInnerScope (parentIndention) {
|
551 | parentIndention = Number.isInteger(parentIndention) ? parentIndention : this.tokenBuffer.getIndention()
|
552 |
|
553 |
|
554 |
|
555 |
|
556 | const token = this.tokenBuffer[0]
|
557 | if (!token || token.type !== 'indention') {
|
558 | return false
|
559 | }
|
560 |
|
561 | return parentIndention < token.value
|
562 | }
|
563 |
|
564 | isOuterScope (parentIndention) {
|
565 | parentIndention = Number.isInteger(parentIndention) ? parentIndention : this.tokenBuffer.getIndention()
|
566 |
|
567 |
|
568 |
|
569 |
|
570 | const token = this.tokenBuffer[0]
|
571 | if (!token || !token.type === 'indention') {
|
572 | return false
|
573 | }
|
574 |
|
575 | return parentIndention > token.value
|
576 | }
|
577 |
|
578 | isSameScope (parentIndention) {
|
579 | parentIndention = Number.isInteger(parentIndention) ? parentIndention : this.tokenBuffer.getIndention()
|
580 |
|
581 |
|
582 |
|
583 |
|
584 | const token = this.tokenBuffer[0]
|
585 | if (!token || !token.type === 'indention') {
|
586 | return false
|
587 | }
|
588 |
|
589 | return parentIndention === token.value
|
590 | }
|
591 |
|
592 | |
593 |
|
594 |
|
595 |
|
596 |
|
597 |
|
598 | isEOF () {
|
599 |
|
600 |
|
601 |
|
602 |
|
603 | return this.tokenBuffer.length === 0
|
604 | }
|
605 |
|
606 | print (msg) {
|
607 |
|
608 | if (msg) {
|
609 | console.log(msg)
|
610 | }
|
611 |
|
612 | console.log(this.tokenBuffer.slice(0, 5))
|
613 | }
|
614 |
|
615 | printNext (msg) {
|
616 | if (msg) {
|
617 | console.log(msg)
|
618 | }
|
619 |
|
620 | const token = this.showNextToken()
|
621 | if (!token) {
|
622 | console.log('> buffer end!!')
|
623 | return
|
624 | }
|
625 |
|
626 | console.log(`> ${token.type} ${token.value}, ind ${token.indention} buffer len ${this.tokenBuffer.length}`)
|
627 | }
|
628 |
|
629 | walkScope () {
|
630 | let scopeIndention = this.tokenBuffer.getIndention()
|
631 | let scopeEnd = null
|
632 | let isInlineScope = true
|
633 |
|
634 |
|
635 | if (this.match('punctuator [{,[,(]')) {
|
636 | const token = this.nextToken()
|
637 | scopeEnd = this.scopeDelimiter[token.value]
|
638 | scopeIndention = null
|
639 | isInlineScope = false
|
640 | }
|
641 |
|
642 |
|
643 | if (this.match('indention')) {
|
644 | const token = this.nextToken()
|
645 | scopeIndention = token.value
|
646 | isInlineScope = false
|
647 | }
|
648 |
|
649 | return {
|
650 | [ Symbol.iterator ]: () => {
|
651 | return {
|
652 | next: () => {
|
653 | if (this.match('indention')) {
|
654 | if (isInlineScope) {
|
655 |
|
656 | return { done: true, value: this }
|
657 | }
|
658 |
|
659 | const token = this.showNextToken()
|
660 | if (scopeIndention === null) {
|
661 | scopeIndention = token.value
|
662 | }
|
663 |
|
664 | if (token.value === scopeIndention) {
|
665 | this.skipNext()
|
666 | if (scopeEnd && this.match(`punctuator "${scopeEnd}"`)) {
|
667 | this.skipNext()
|
668 |
|
669 | return { done: true, value: this }
|
670 | }
|
671 |
|
672 | return { done: this.isEOF(), value: this }
|
673 | }
|
674 |
|
675 | if (!scopeEnd && token.value > scopeIndention) {
|
676 | this.syntaxError('Indention error!')
|
677 | }
|
678 |
|
679 | if (!scopeEnd && token.value < scopeIndention) {
|
680 |
|
681 | }
|
682 |
|
683 | if (scopeEnd) {
|
684 | this.skipNext()
|
685 | if (this.match(`punctuator "${scopeEnd}"`)) {
|
686 | this.skipNext()
|
687 | } else {
|
688 | this.syntaxError('Unexpected scope end or invalid indention!')
|
689 | }
|
690 | }
|
691 |
|
692 |
|
693 | return { done: true, value: this }
|
694 | } else if (scopeEnd && this.match('punctuator ","')) {
|
695 | this.skipNext()
|
696 | if (this.match('indention')) {
|
697 | const token = this.showNextToken()
|
698 | if (scopeIndention === null) {
|
699 | scopeIndention = token.value
|
700 | this.skipNext()
|
701 | } else if (token.value === scopeIndention) {
|
702 | this.skipNext()
|
703 | } else {
|
704 | this.syntaxError('Indention error!')
|
705 | }
|
706 | }
|
707 | } else if (scopeEnd && this.match(`punctuator "${scopeEnd}"`)) {
|
708 | this.skipNext()
|
709 |
|
710 | return { done: true, value: this }
|
711 | }
|
712 |
|
713 |
|
714 | return { done: this.isEOF(), value: this }
|
715 | }
|
716 | }
|
717 | }
|
718 | }
|
719 | }
|
720 |
|
721 | swapToken (tokenType) {
|
722 | this.tokenBuffer[0].type = tokenType
|
723 | }
|
724 | }
|
725 |
|
726 | module.exports = Parser
|