UNPKG

10.6 kBJavaScriptView Raw
1import Chunk from './chunk';
2import Rule from './rule';
3import Grammar from './grammar';
4
5/**
6 * @param {string} name
7 * @param {Chunk} chunk
8 * @param {string} originalExpressionText
9 */
10function createRule(name, chunk, originalExpressionText) {
11 chunk.prune();
12 const expression = chunk.getExpression();
13 const rule = new Rule(name, expression, originalExpressionText);
14 return rule;
15}
16
17/**
18 * @param {Chunk} parentChunk
19 * @param {function(): string} readNext
20 * @param {string} stopChar
21 * @return {string}
22 */
23function loadExpression(parentChunk, readNext, stopChar) {
24 const expressionTextSB = [];
25 let lastChar = 0;
26 const sb = [];
27 let isFirst = true;
28 let isInSpecialGroup = false;
29 let specialGroupChar = 0;
30 const isLiteral = parentChunk.getType() == Chunk.ChunkType.LITERAL;
31 for (let c; (c = readNext()) != -1;) {
32 expressionTextSB.push(c);
33 if (isLiteral) {
34 if (c == stopChar) {
35 const s = sb.join("");
36 parentChunk.setText(s);
37 return expressionTextSB.join("");
38 }
39 sb.push(c);
40 } else {
41 if (isFirst && parentChunk.getType() == Chunk.ChunkType.GROUP) {
42 switch (c) {
43 case '*':
44 isInSpecialGroup = true;
45 specialGroupChar = c;
46 break;
47 case '?':
48 isInSpecialGroup = true;
49 specialGroupChar = c;
50 break;
51 }
52 }
53 isFirst = false;
54 if (isInSpecialGroup) {
55 if (c == ')' && lastChar == specialGroupChar) {
56 // Mutate parent group
57 switch (specialGroupChar) {
58 case '*': parentChunk.setType(Chunk.ChunkType.COMMENT); break;
59 case '?': parentChunk.setType(Chunk.ChunkType.SPECIAL_SEQUENCE); break;
60 }
61 let comment = sb.join("");
62 comment = comment.slice(1, comment.length - 1).trim();
63 parentChunk.setText(comment);
64 return expressionTextSB.join("");
65 }
66 if (sb.length > 0 || !/\s/.test(c)) {
67 sb.push(c);
68 }
69 } else {
70 if (c == stopChar) {
71 const content = sb.join("").trim();
72 if (content.length > 0) {
73 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
74 }
75 return expressionTextSB.join("");
76 }
77 switch (c) {
78 case ',':
79 case ' ':
80 case '\n':
81 case '\r':
82 case '\t': {
83 const content = sb.join("").trim();
84 if (content.length > 0) {
85 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
86 }
87 sb.length = 0;
88 // parentChunk.addChunk(new Chunk(Chunk.ChunkType.CONCATENATION));
89 break;
90 }
91 case '|': {
92 const content = sb.join("").trim();
93 if (content.length > 0) {
94 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
95 }
96 sb.length = 0;
97 parentChunk.addChunk(new Chunk(Chunk.ChunkType.ALTERNATION));
98 break;
99 }
100 case '*':
101 case '+':
102 case '?': {
103 const content = sb.join("").trim();
104 if (content.length > 0) {
105 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
106 }
107 sb.length = 0;
108 parentChunk.addChunk(new Chunk(Chunk.ChunkType.REPETITION_TOKEN, c));
109 break;
110 }
111 case '\"': {
112 const content = sb.join("").trim();
113 if (content.length > 0) {
114 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
115 }
116 sb.length = 0;
117 const literalChunk = new Chunk(Chunk.ChunkType.LITERAL);
118 const subExpressionText = loadExpression(literalChunk, readNext, '\"');
119 expressionTextSB.push(subExpressionText);
120 parentChunk.addChunk(literalChunk);
121 break;
122 }
123 case '\'': {
124 const content = sb.join("").trim();
125 if (content.length > 0) {
126 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
127 }
128 sb.length = 0;
129 const literalChunk = new Chunk(Chunk.ChunkType.LITERAL);
130 const subExpressionText = loadExpression(literalChunk, readNext, '\'');
131 expressionTextSB.push(subExpressionText);
132 parentChunk.addChunk(literalChunk);
133 break;
134 }
135 case '(': {
136 const content = sb.join("").trim();
137 if (content.length > 0) {
138 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
139 }
140 sb.length = 0;
141 const groupChunk = new Chunk(Chunk.ChunkType.GROUP);
142 const subExpressionText = loadExpression(groupChunk, readNext, ')');
143 expressionTextSB.push(subExpressionText);
144 parentChunk.addChunk(groupChunk);
145 break;
146 }
147 case '[': {
148 const content = sb.join("").trim();
149 if (content.length > 0) {
150 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
151 }
152 sb.length = 0;
153 const optionChunk = new Chunk(Chunk.ChunkType.OPTION);
154 const subExpressionText = loadExpression(optionChunk, readNext, ']');
155 expressionTextSB.push(subExpressionText);
156 parentChunk.addChunk(optionChunk);
157 break;
158 }
159 case '{': {
160 const content = sb.join("").trim();
161 if (content.length > 0) {
162 parentChunk.addChunk(new Chunk(Chunk.ChunkType.RULE, content));
163 }
164 sb.length = 0;
165 const repetitionChunk = new Chunk(Chunk.ChunkType.REPETITION);
166 repetitionChunk.setMinCount(0);
167 const subExpressionText = loadExpression(repetitionChunk, readNext, '}');
168 expressionTextSB.push(subExpressionText);
169 parentChunk.addChunk(repetitionChunk);
170 break;
171 }
172 default: {
173 if (sb.length > 0 || !/\s/.test(c)) {
174 sb.push(c);
175 }
176 break;
177 }
178 }
179 }
180 lastChar = c;
181 }
182 }
183 return expressionTextSB.join("");
184}
185
186
187export default class BNFToGrammar {
188
189 /**
190 * @param {string} text
191 * @return {Grammar}
192 */
193 convert(text) {
194 const readNext = (function () {
195 // all your code here
196 let index = 0;
197 return function () {
198 if (index < text.length) {
199 const char = text[index];
200 index++;
201 return char;
202 }
203 return -1;
204 };
205 })();
206 const sb = [];
207 const ruleList = [];
208 for (let c; (c = readNext()) != -1;) {
209 switch (c) {
210 case '=': {
211 const chunk = new Chunk(Chunk.ChunkType.GROUP);
212 let expressionText = loadExpression(chunk, readNext, ';');
213 if(expressionText.endsWith(";")) {
214 expressionText = expressionText.slice(0, expressionText.length - 1);
215 }
216 let ruleName = sb.join("");
217 sb.length = 0;
218 if (ruleName.endsWith(":")) {
219 ruleName = ruleName.slice(0, ruleName.length - 1);
220 if (ruleName.endsWith(":")) {
221 ruleName = ruleName.slice(0, ruleName.length - 1);
222 }
223 }
224 ruleName = ruleName.trim();
225 const rule = createRule(ruleName, chunk, expressionText);
226 ruleList.push(rule);
227 break;
228 }
229 // Consider that '(' in rule name is start of a comment.
230 case '(': {
231 if (readNext() != '*') {
232 throw "Expecting start of a comment after '(' but could not find '*'!";
233 }
234 let lastChar = 0;
235 for (let c2; (c2 = readNext()) != -1;) {
236 if (c2 == ')' && lastChar == '*') {
237 break;
238 }
239 lastChar = c2;
240 }
241 break;
242 }
243 default: {
244 if (!/\s/.test(c) || sb.length > 0) {
245 sb.push(c);
246 }
247 break;
248 }
249 }
250 }
251 return new Grammar(ruleList);
252 }
253
254}