1 | import Chunk from './chunk';
|
2 | import Rule from './rule';
|
3 | import Grammar from './grammar';
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 | function createRule(name, chunk, originalExpressionText) {
|
11 | chunk.prune();
|
12 | const expression = chunk.getExpression();
|
13 | const rule = new Rule(name, expression, originalExpressionText);
|
14 | return rule;
|
15 | }
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 | function loadExpression(parentChunk, readNext, stopChar) {
|
24 | const expressionTextSB = [];
|
25 | let lastChar = 0;
|
26 | const sb = [];
|
27 | let isFirst = true;
|
28 | let isInSpecialGroup = false;
|
29 | let specialGroupChar = 0;
|
30 | const isLiteral = parentChunk.getType() == Chunk.ChunkType.LITERAL;
|
31 | for (let c; (c = readNext()) != -1;) {
|
32 | expressionTextSB.push(c);
|
33 | if (isLiteral) {
|
34 | if (c == stopChar) {
|
35 | const s = sb.join("");
|
36 | parentChunk.setText(s);
|
37 | return expressionTextSB.join("");
|
38 | }
|
39 | sb.push(c);
|
40 | } else {
|
41 | if (isFirst && parentChunk.getType() == Chunk.ChunkType.GROUP) {
|
42 | switch (c) {
|
43 | case '*':
|
44 | isInSpecialGroup = true;
|
45 | specialGroupChar = c;
|
46 | break;
|
47 | case '?':
|
48 | isInSpecialGroup = true;
|
49 | specialGroupChar = c;
|
50 | break;
|
51 | }
|
52 | }
|
53 | isFirst = false;
|
54 | if (isInSpecialGroup) {
|
55 | if (c == ')' && lastChar == specialGroupChar) {
|
56 |
|
57 | switch (specialGroupChar) {
|
58 | case '*': parentChunk.setType(Chunk.ChunkType.COMMENT); break;
|
59 | case '?': parentChunk.setType(Chunk.ChunkType.SPECIAL_SEQUENCE); break;
|
60 | }
|
61 | let comment = sb.join("");
|
62 | comment = comment.slice(1, comment.length - 1).trim();
|
63 | parentChunk.setText(comment);
|
64 | return expressionTextSB.join("");
|
65 | }
|
66 | if (sb.length > 0 || !/\s/.test(c)) {
|
67 | sb.push(c);
|
68 | }
|
69 | } else {
|
70 | if (c == stopChar) {
|
71 | const content = sb.join("").trim();
|
72 | if (content.length > 0) {
|
73 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
74 | }
|
75 | return expressionTextSB.join("");
|
76 | }
|
77 | switch (c) {
|
78 | case ',':
|
79 | case ' ':
|
80 | case '\n':
|
81 | case '\r':
|
82 | case '\t': {
|
83 | const content = sb.join("").trim();
|
84 | if (content.length > 0) {
|
85 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
86 | }
|
87 | sb.length = 0;
|
88 |
|
89 | break;
|
90 | }
|
91 | case '|': {
|
92 | const content = sb.join("").trim();
|
93 | if (content.length > 0) {
|
94 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
95 | }
|
96 | sb.length = 0;
|
97 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.ALTERNATION));
|
98 | break;
|
99 | }
|
100 | case '*':
|
101 | case '+':
|
102 | case '?': {
|
103 | const content = sb.join("").trim();
|
104 | if (content.length > 0) {
|
105 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
106 | }
|
107 | sb.length = 0;
|
108 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.REPETITION_TOKEN, c));
|
109 | break;
|
110 | }
|
111 | case '\"': {
|
112 | const content = sb.join("").trim();
|
113 | if (content.length > 0) {
|
114 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
115 | }
|
116 | sb.length = 0;
|
117 | const literalChunk = new Chunk(Chunk.ChunkType.LITERAL);
|
118 | const subExpressionText = loadExpression(literalChunk, readNext, '\"');
|
119 | expressionTextSB.push(subExpressionText);
|
120 | parentChunk.addChunk(literalChunk);
|
121 | break;
|
122 | }
|
123 | case '\'': {
|
124 | const content = sb.join("").trim();
|
125 | if (content.length > 0) {
|
126 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
127 | }
|
128 | sb.length = 0;
|
129 | const literalChunk = new Chunk(Chunk.ChunkType.LITERAL);
|
130 | const subExpressionText = loadExpression(literalChunk, readNext, '\'');
|
131 | expressionTextSB.push(subExpressionText);
|
132 | parentChunk.addChunk(literalChunk);
|
133 | break;
|
134 | }
|
135 | case '(': {
|
136 | const content = sb.join("").trim();
|
137 | if (content.length > 0) {
|
138 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
139 | }
|
140 | sb.length = 0;
|
141 | const groupChunk = new Chunk(Chunk.ChunkType.GROUP);
|
142 | const subExpressionText = loadExpression(groupChunk, readNext, ')');
|
143 | expressionTextSB.push(subExpressionText);
|
144 | parentChunk.addChunk(groupChunk);
|
145 | break;
|
146 | }
|
147 | case '[': {
|
148 | const content = sb.join("").trim();
|
149 | if (content.length > 0) {
|
150 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
151 | }
|
152 | sb.length = 0;
|
153 | const optionChunk = new Chunk(Chunk.ChunkType.OPTION);
|
154 | const subExpressionText = loadExpression(optionChunk, readNext, ']');
|
155 | expressionTextSB.push(subExpressionText);
|
156 | parentChunk.addChunk(optionChunk);
|
157 | break;
|
158 | }
|
159 | case '{': {
|
160 | const content = sb.join("").trim();
|
161 | if (content.length > 0) {
|
162 | parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
|
163 | }
|
164 | sb.length = 0;
|
165 | const repetitionChunk = new Chunk(Chunk.ChunkType.REPETITION);
|
166 | repetitionChunk.setMinCount(0);
|
167 | const subExpressionText = loadExpression(repetitionChunk, readNext, '}');
|
168 | expressionTextSB.push(subExpressionText);
|
169 | parentChunk.addChunk(repetitionChunk);
|
170 | break;
|
171 | }
|
172 | default: {
|
173 | if (sb.length > 0 || !/\s/.test(c)) {
|
174 | sb.push(c);
|
175 | }
|
176 | break;
|
177 | }
|
178 | }
|
179 | }
|
180 | lastChar = c;
|
181 | }
|
182 | }
|
183 | return expressionTextSB.join("");
|
184 | }
|
185 |
|
186 |
|
187 | export default class BNFToGrammar {
|
188 |
|
189 | |
190 |
|
191 |
|
192 |
|
193 | convert(text) {
|
194 | const readNext = (function () {
|
195 |
|
196 | let index = 0;
|
197 | return function () {
|
198 | if (index < text.length) {
|
199 | const char = text[index];
|
200 | index++;
|
201 | return char;
|
202 | }
|
203 | return -1;
|
204 | };
|
205 | })();
|
206 | const sb = [];
|
207 | const ruleList = [];
|
208 | for (let c; (c = readNext()) != -1;) {
|
209 | switch (c) {
|
210 | case '=': {
|
211 | const chunk = new Chunk(Chunk.ChunkType.GROUP);
|
212 | let expressionText = loadExpression(chunk, readNext, ';');
|
213 | if(expressionText.endsWith(";")) {
|
214 | expressionText = expressionText.slice(0, expressionText.length - 1);
|
215 | }
|
216 | let ruleName = sb.join("");
|
217 | sb.length = 0;
|
218 | if (ruleName.endsWith(":")) {
|
219 | ruleName = ruleName.slice(0, ruleName.length - 1);
|
220 | if (ruleName.endsWith(":")) {
|
221 | ruleName = ruleName.slice(0, ruleName.length - 1);
|
222 | }
|
223 | }
|
224 | ruleName = ruleName.trim();
|
225 | const rule = createRule(ruleName, chunk, expressionText);
|
226 | ruleList.push(rule);
|
227 | break;
|
228 | }
|
229 |
|
230 | case '(': {
|
231 | if (readNext() != '*') {
|
232 | throw "Expecting start of a comment after '(' but could not find '*'!";
|
233 | }
|
234 | let lastChar = 0;
|
235 | for (let c2; (c2 = readNext()) != -1;) {
|
236 | if (c2 == ')' && lastChar == '*') {
|
237 | break;
|
238 | }
|
239 | lastChar = c2;
|
240 | }
|
241 | break;
|
242 | }
|
243 | default: {
|
244 | if (!/\s/.test(c) || sb.length > 0) {
|
245 | sb.push(c);
|
246 | }
|
247 | break;
|
248 | }
|
249 | }
|
250 | }
|
251 | return new Grammar(ruleList);
|
252 | }
|
253 |
|
254 | }
|