1 | var Token = require("./Token");
|
2 | var StringSource = require("./StringSource");
|
3 |
|
4 | exports.RegexTokeniser = RegexTokeniser;
|
5 |
|
6 | function RegexTokeniser(rules) {
|
7 | rules = rules.map(function(rule) {
|
8 | return {
|
9 | name: rule.name,
|
10 | regex: new RegExp(rule.regex.source, "g")
|
11 | };
|
12 | });
|
13 |
|
14 | function tokenise(input, description) {
|
15 | var source = new StringSource(input, description);
|
16 | var index = 0;
|
17 | var tokens = [];
|
18 |
|
19 | while (index < input.length) {
|
20 | var nextToken = readNextToken(input, index, source);
|
21 | index += nextToken.value.length;
|
22 | tokens.push(nextToken);
|
23 | }
|
24 |
|
25 | tokens.push(endToken(input, source));
|
26 | return tokens;
|
27 | }
|
28 |
|
29 | function readNextToken(string, startIndex, source) {
|
30 | for (var i = 0; i < rules.length; i++) {
|
31 | var regex = rules[i].regex;
|
32 | regex.lastIndex = startIndex;
|
33 | var result = regex.exec(string);
|
34 | if (result && result.index === startIndex && result[0].length) {
|
35 | var value = result[0];
|
36 | return new Token(
|
37 | rules[i].name,
|
38 | value,
|
39 | source.range(startIndex, startIndex + value.length)
|
40 | );
|
41 | }
|
42 | }
|
43 | return new Token(
|
44 | "unrecognisedCharacter",
|
45 | string.substring(startIndex, startIndex + 1),
|
46 | source.range(startIndex, startIndex + 1)
|
47 | );
|
48 | }
|
49 |
|
50 | function endToken(input, source) {
|
51 | return new Token(
|
52 | "end",
|
53 | null,
|
54 | source.range(input.length, input.length)
|
55 | );
|
56 | }
|
57 |
|
58 | return {
|
59 | tokenise: tokenise
|
60 | }
|
61 | }
|
62 |
|
63 |
|