1 | import { FSM, makeTransition } from "@webassemblyjs/helper-fsm";
|
2 | import { codeFrameFromSource } from "@webassemblyjs/helper-code-frame";
|
3 |
|
4 |
|
5 | function getCodeFrame(source, line, column) {
|
6 | var loc = {
|
7 | start: {
|
8 | line: line,
|
9 | column: column
|
10 | }
|
11 | };
|
12 | return "\n" + codeFrameFromSource(source, loc) + "\n";
|
13 | }
|
14 |
|
15 | var WHITESPACE = /\s/;
|
16 | var PARENS = /\(|\)/;
|
17 | var LETTERS = /[a-z0-9_/]/i;
|
18 | var idchar = /[a-z0-9!#$%&*+./:<=>?@\\[\]^_`|~-]/i;
|
19 | var valtypes = ["i32", "i64", "f32", "f64"];
|
20 | var NUMBERS = /[0-9|.|_]/;
|
21 | var NUMBER_KEYWORDS = /nan|inf/;
|
22 |
|
23 | function isNewLine(_char) {
|
24 | return _char.charCodeAt(0) === 10 || _char.charCodeAt(0) === 13;
|
25 | }
|
26 |
|
27 | function Token(type, value, start, end) {
|
28 | var opts = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : {};
|
29 | var token = {
|
30 | type: type,
|
31 | value: value,
|
32 | loc: {
|
33 | start: start,
|
34 | end: end
|
35 | }
|
36 | };
|
37 |
|
38 | if (Object.keys(opts).length > 0) {
|
39 |
|
40 | token["opts"] = opts;
|
41 | }
|
42 |
|
43 | return token;
|
44 | }
|
45 |
|
46 | var tokenTypes = {
|
47 | openParen: "openParen",
|
48 | closeParen: "closeParen",
|
49 | number: "number",
|
50 | string: "string",
|
51 | name: "name",
|
52 | identifier: "identifier",
|
53 | valtype: "valtype",
|
54 | dot: "dot",
|
55 | comment: "comment",
|
56 | equal: "equal",
|
57 | keyword: "keyword"
|
58 | };
|
59 | export var keywords = {
|
60 | module: "module",
|
61 | func: "func",
|
62 | param: "param",
|
63 | result: "result",
|
64 | "export": "export",
|
65 | loop: "loop",
|
66 | block: "block",
|
67 | "if": "if",
|
68 | then: "then",
|
69 | "else": "else",
|
70 | call: "call",
|
71 | call_indirect: "call_indirect",
|
72 | "import": "import",
|
73 | memory: "memory",
|
74 | shared: "shared",
|
75 | table: "table",
|
76 | global: "global",
|
77 | anyfunc: "anyfunc",
|
78 | mut: "mut",
|
79 | data: "data",
|
80 | type: "type",
|
81 | elem: "elem",
|
82 | start: "start",
|
83 | offset: "offset"
|
84 | };
|
85 | var NUMERIC_SEPARATOR = "_";
|
86 |
|
87 |
|
88 |
|
89 |
|
90 |
|
91 | var numberLiteralFSM = new FSM({
|
92 | START: [makeTransition(/-|\+/, "AFTER_SIGN"), makeTransition(/nan:0x/, "NAN_HEX", {
|
93 | n: 6
|
94 | }), makeTransition(/nan|inf/, "STOP", {
|
95 | n: 3
|
96 | }), makeTransition(/0x/, "HEX", {
|
97 | n: 2
|
98 | }), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
|
99 | AFTER_SIGN: [makeTransition(/nan:0x/, "NAN_HEX", {
|
100 | n: 6
|
101 | }), makeTransition(/nan|inf/, "STOP", {
|
102 | n: 3
|
103 | }), makeTransition(/0x/, "HEX", {
|
104 | n: 2
|
105 | }), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
|
106 | DEC_FRAC: [makeTransition(/[0-9]/, "DEC_FRAC", {
|
107 | allowedSeparator: NUMERIC_SEPARATOR
|
108 | }), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
|
109 | DEC: [makeTransition(/[0-9]/, "DEC", {
|
110 | allowedSeparator: NUMERIC_SEPARATOR
|
111 | }), makeTransition(/\./, "DEC_FRAC"), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
|
112 | DEC_SIGNED_EXP: [makeTransition(/\+|-/, "DEC_EXP"), makeTransition(/[0-9]/, "DEC_EXP")],
|
113 | DEC_EXP: [makeTransition(/[0-9]/, "DEC_EXP", {
|
114 | allowedSeparator: NUMERIC_SEPARATOR
|
115 | })],
|
116 | HEX: [makeTransition(/[0-9|A-F|a-f]/, "HEX", {
|
117 | allowedSeparator: NUMERIC_SEPARATOR
|
118 | }), makeTransition(/\./, "HEX_FRAC"), makeTransition(/p|P/, "HEX_SIGNED_EXP")],
|
119 | HEX_FRAC: [makeTransition(/[0-9|A-F|a-f]/, "HEX_FRAC", {
|
120 | allowedSeparator: NUMERIC_SEPARATOR
|
121 | }), makeTransition(/p|P|/, "HEX_SIGNED_EXP")],
|
122 | HEX_SIGNED_EXP: [makeTransition(/[0-9|+|-]/, "HEX_EXP")],
|
123 | HEX_EXP: [makeTransition(/[0-9]/, "HEX_EXP", {
|
124 | allowedSeparator: NUMERIC_SEPARATOR
|
125 | })],
|
126 | NAN_HEX: [makeTransition(/[0-9|A-F|a-f]/, "NAN_HEX", {
|
127 | allowedSeparator: NUMERIC_SEPARATOR
|
128 | })],
|
129 | STOP: []
|
130 | }, "START", "STOP");
|
131 | export function tokenize(input) {
|
132 | var current = 0;
|
133 | var _char2 = input[current];
|
134 |
|
135 | var column = 1;
|
136 | var line = 1;
|
137 | var tokens = [];
|
138 | |
139 |
|
140 |
|
141 |
|
142 | function pushToken(type) {
|
143 | return function (v) {
|
144 | var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
145 | var startColumn = opts.startColumn || column - String(v).length;
|
146 | delete opts.startColumn;
|
147 | var endColumn = opts.endColumn || startColumn + String(v).length - 1;
|
148 | delete opts.endColumn;
|
149 | var start = {
|
150 | line: line,
|
151 | column: startColumn
|
152 | };
|
153 | var end = {
|
154 | line: line,
|
155 | column: endColumn
|
156 | };
|
157 | tokens.push(Token(type, v, start, end, opts));
|
158 | };
|
159 | }
|
160 | |
161 |
|
162 |
|
163 |
|
164 |
|
165 | var pushCloseParenToken = pushToken(tokenTypes.closeParen);
|
166 | var pushOpenParenToken = pushToken(tokenTypes.openParen);
|
167 | var pushNumberToken = pushToken(tokenTypes.number);
|
168 | var pushValtypeToken = pushToken(tokenTypes.valtype);
|
169 | var pushNameToken = pushToken(tokenTypes.name);
|
170 | var pushIdentifierToken = pushToken(tokenTypes.identifier);
|
171 | var pushKeywordToken = pushToken(tokenTypes.keyword);
|
172 | var pushDotToken = pushToken(tokenTypes.dot);
|
173 | var pushStringToken = pushToken(tokenTypes.string);
|
174 | var pushCommentToken = pushToken(tokenTypes.comment);
|
175 | var pushEqualToken = pushToken(tokenTypes.equal);
|
176 | |
177 |
|
178 |
|
179 |
|
180 |
|
181 |
|
182 |
|
183 |
|
184 |
|
185 |
|
186 |
|
187 | function lookahead() {
|
188 | var length = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
|
189 | var offset = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
|
190 | return input.substring(current + offset, current + offset + length).toLowerCase();
|
191 | }
|
192 | |
193 |
|
194 |
|
195 |
|
196 |
|
197 |
|
198 |
|
199 | function eatCharacter() {
|
200 | var amount = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
|
201 | column += amount;
|
202 | current += amount;
|
203 | _char2 = input[current];
|
204 | }
|
205 |
|
206 | while (current < input.length) {
|
207 |
|
208 | if (_char2 === ";" && lookahead() === ";") {
|
209 | var startColumn = column;
|
210 | eatCharacter(2);
|
211 | var text = "";
|
212 |
|
213 | while (!isNewLine(_char2)) {
|
214 | text += _char2;
|
215 | eatCharacter();
|
216 |
|
217 | if (_char2 === undefined) {
|
218 | break;
|
219 | }
|
220 | }
|
221 |
|
222 | var endColumn = column;
|
223 | pushCommentToken(text, {
|
224 | type: "leading",
|
225 | startColumn: startColumn,
|
226 | endColumn: endColumn
|
227 | });
|
228 | continue;
|
229 | }
|
230 |
|
231 |
|
232 | if (_char2 === "(" && lookahead() === ";") {
|
233 | var _startColumn = column;
|
234 | eatCharacter(2);
|
235 | var _text = "";
|
236 |
|
237 | while (true) {
|
238 | _char2 = input[current];
|
239 |
|
240 | if (_char2 === ";" && lookahead() === ")") {
|
241 | eatCharacter(2);
|
242 | break;
|
243 | }
|
244 |
|
245 | _text += _char2;
|
246 | eatCharacter();
|
247 |
|
248 | if (isNewLine(_char2)) {
|
249 | line++;
|
250 | column = 0;
|
251 | }
|
252 | }
|
253 |
|
254 | var _endColumn = column;
|
255 | pushCommentToken(_text, {
|
256 | type: "block",
|
257 | startColumn: _startColumn,
|
258 | endColumn: _endColumn
|
259 | });
|
260 | continue;
|
261 | }
|
262 |
|
263 | if (_char2 === "(") {
|
264 | pushOpenParenToken(_char2);
|
265 | eatCharacter();
|
266 | continue;
|
267 | }
|
268 |
|
269 | if (_char2 === "=") {
|
270 | pushEqualToken(_char2);
|
271 | eatCharacter();
|
272 | continue;
|
273 | }
|
274 |
|
275 | if (_char2 === ")") {
|
276 | pushCloseParenToken(_char2);
|
277 | eatCharacter();
|
278 | continue;
|
279 | }
|
280 |
|
281 | if (isNewLine(_char2)) {
|
282 | line++;
|
283 | eatCharacter();
|
284 | column = 0;
|
285 | continue;
|
286 | }
|
287 |
|
288 | if (WHITESPACE.test(_char2)) {
|
289 | eatCharacter();
|
290 | continue;
|
291 | }
|
292 |
|
293 | if (_char2 === "$") {
|
294 | var _startColumn2 = column;
|
295 | eatCharacter();
|
296 | var value = "";
|
297 |
|
298 | while (idchar.test(_char2)) {
|
299 | value += _char2;
|
300 | eatCharacter();
|
301 | }
|
302 |
|
303 | var _endColumn2 = column;
|
304 | pushIdentifierToken(value, {
|
305 | startColumn: _startColumn2,
|
306 | endColumn: _endColumn2
|
307 | });
|
308 | continue;
|
309 | }
|
310 |
|
311 | if (NUMBERS.test(_char2) || NUMBER_KEYWORDS.test(lookahead(3, 0)) || _char2 === "-" || _char2 === "+") {
|
312 | var _startColumn3 = column;
|
313 |
|
314 | var _value = numberLiteralFSM.run(input.slice(current));
|
315 |
|
316 | if (_value === "") {
|
317 | throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(_char2));
|
318 | }
|
319 |
|
320 | pushNumberToken(_value, {
|
321 | startColumn: _startColumn3
|
322 | });
|
323 | eatCharacter(_value.length);
|
324 |
|
325 | if (_char2 && !PARENS.test(_char2) && !WHITESPACE.test(_char2)) {
|
326 | throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(_char2));
|
327 | }
|
328 |
|
329 | continue;
|
330 | }
|
331 |
|
332 | if (_char2 === '"') {
|
333 | var _startColumn4 = column;
|
334 | var _value2 = "";
|
335 | eatCharacter();
|
336 |
|
337 | while (_char2 !== '"') {
|
338 | if (isNewLine(_char2)) {
|
339 | throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(_char2));
|
340 | }
|
341 |
|
342 | _value2 += _char2;
|
343 | eatCharacter();
|
344 | }
|
345 |
|
346 | eatCharacter();
|
347 |
|
348 | var _endColumn3 = column;
|
349 | pushStringToken(_value2, {
|
350 | startColumn: _startColumn4,
|
351 | endColumn: _endColumn3
|
352 | });
|
353 | continue;
|
354 | }
|
355 |
|
356 | if (LETTERS.test(_char2)) {
|
357 | var _value3 = "";
|
358 | var _startColumn5 = column;
|
359 |
|
360 | while (_char2 && LETTERS.test(_char2)) {
|
361 | _value3 += _char2;
|
362 | eatCharacter();
|
363 | }
|
364 | |
365 |
|
366 |
|
367 |
|
368 |
|
369 | if (_char2 === ".") {
|
370 | var dotStartColumn = column;
|
371 |
|
372 | if (valtypes.indexOf(_value3) !== -1) {
|
373 | pushValtypeToken(_value3, {
|
374 | startColumn: _startColumn5
|
375 | });
|
376 | } else {
|
377 | pushNameToken(_value3);
|
378 | }
|
379 |
|
380 | while (_char2 === ".") {
|
381 | eatCharacter();
|
382 |
|
383 | _value3 = "";
|
384 | var nameStartColumn = column;
|
385 |
|
386 | while (LETTERS.test(_char2)) {
|
387 | _value3 += _char2;
|
388 | eatCharacter();
|
389 | }
|
390 |
|
391 | pushDotToken(".", {
|
392 | startColumn: dotStartColumn
|
393 | });
|
394 | pushNameToken(_value3, {
|
395 | startColumn: nameStartColumn
|
396 | });
|
397 | }
|
398 |
|
399 | continue;
|
400 | }
|
401 | |
402 |
|
403 |
|
404 |
|
405 |
|
406 |
|
407 | if (typeof keywords[_value3] === "string") {
|
408 | pushKeywordToken(_value3, {
|
409 | startColumn: _startColumn5
|
410 | });
|
411 | continue;
|
412 | }
|
413 | |
414 |
|
415 |
|
416 |
|
417 |
|
418 | if (valtypes.indexOf(_value3) !== -1) {
|
419 | pushValtypeToken(_value3, {
|
420 | startColumn: _startColumn5
|
421 | });
|
422 | continue;
|
423 | }
|
424 | |
425 |
|
426 |
|
427 |
|
428 |
|
429 | pushNameToken(_value3, {
|
430 | startColumn: _startColumn5
|
431 | });
|
432 | continue;
|
433 | }
|
434 |
|
435 | throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(_char2));
|
436 | }
|
437 |
|
438 | return tokens;
|
439 | }
|
440 | export var tokens = tokenTypes; |
\ | No newline at end of file |