UNPKG

9.86 kBJavaScriptView Raw
1"use strict";
2var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3 if (k2 === undefined) k2 = k;
4 Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
5}) : (function(o, m, k, k2) {
6 if (k2 === undefined) k2 = k;
7 o[k2] = m[k];
8}));
9var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
10 Object.defineProperty(o, "default", { enumerable: true, value: v });
11}) : function(o, v) {
12 o["default"] = v;
13});
14var __importStar = (this && this.__importStar) || function (mod) {
15 if (mod && mod.__esModule) return mod;
16 var result = {};
17 if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
18 __setModuleDefault(result, mod);
19 return result;
20};
21Object.defineProperty(exports, "__esModule", { value: true });
22exports.tokenizer = void 0;
23const util = __importStar(require("./util"));
24const types_1 = require("./types");
25const sets = __importStar(require("./sets"));
26/**
27 * Tokenizes a regular expression (that is currently a string)
28 * @param {string} regexpStr String of regular expression to be tokenized
29 *
30 * @returns {Root}
31 */
32exports.tokenizer = (regexpStr) => {
33 let i = 0, c;
34 let start = { type: types_1.types.ROOT, stack: [] };
35 // Keep track of last clause/group and stack.
36 let lastGroup = start;
37 let last = start.stack;
38 let groupStack = [];
39 const repeatErr = (col) => {
40 throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Nothing to repeat at column ${col - 1}`);
41 };
42 // Decode a few escaped characters.
43 let str = util.strToChars(regexpStr);
44 // Iterate through each character in string.
45 while (i < str.length) {
46 switch (c = str[i++]) {
47 // Handle escaped characters, inclues a few sets.
48 case '\\':
49 switch (c = str[i++]) {
50 case 'b':
51 last.push({ type: types_1.types.POSITION, value: 'b' });
52 break;
53 case 'B':
54 last.push({ type: types_1.types.POSITION, value: 'B' });
55 break;
56 case 'w':
57 last.push(sets.words());
58 break;
59 case 'W':
60 last.push(sets.notWords());
61 break;
62 case 'd':
63 last.push(sets.ints());
64 break;
65 case 'D':
66 last.push(sets.notInts());
67 break;
68 case 's':
69 last.push(sets.whitespace());
70 break;
71 case 'S':
72 last.push(sets.notWhitespace());
73 break;
74 default:
75 // Check if c is integer.
76 // In which case it's a reference.
77 if (/\d/.test(c)) {
78 last.push({ type: types_1.types.REFERENCE, value: parseInt(c, 10) });
79 // Escaped character.
80 }
81 else {
82 last.push({ type: types_1.types.CHAR, value: c.charCodeAt(0) });
83 }
84 }
85 break;
86 // Positionals.
87 case '^':
88 last.push({ type: types_1.types.POSITION, value: '^' });
89 break;
90 case '$':
91 last.push({ type: types_1.types.POSITION, value: '$' });
92 break;
93 // Handle custom sets.
94 case '[': {
95 // Check if this class is 'anti' i.e. [^abc].
96 let not;
97 if (str[i] === '^') {
98 not = true;
99 i++;
100 }
101 else {
102 not = false;
103 }
104 // Get all the characters in class.
105 let classTokens = util.tokenizeClass(str.slice(i), regexpStr);
106 // Increase index by length of class.
107 i += classTokens[1];
108 last.push({
109 type: types_1.types.SET,
110 set: classTokens[0],
111 not,
112 });
113 break;
114 }
115 // Class of any character except \n.
116 case '.':
117 last.push(sets.anyChar());
118 break;
119 // Push group onto stack.
120 case '(': {
121 // Create group.
122 let group = {
123 type: types_1.types.GROUP,
124 stack: [],
125 remember: true,
126 };
127 // If if this is a special kind of group.
128 if (str[i] === '?') {
129 c = str[i + 1];
130 i += 2;
131 // Match if followed by.
132 if (c === '=') {
133 group.followedBy = true;
134 // Match if not followed by.
135 }
136 else if (c === '!') {
137 group.notFollowedBy = true;
138 }
139 else if (c !== ':') {
140 throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid group, character '${c}'` +
141 ` after '?' at column ${i - 1}`);
142 }
143 group.remember = false;
144 }
145 // Insert subgroup into current group stack.
146 last.push(group);
147 // Remember the current group for when the group closes.
148 groupStack.push(lastGroup);
149 // Make this new group the current group.
150 lastGroup = group;
151 last = group.stack;
152 break;
153 }
154 // Pop group out of stack.
155 case ')':
156 if (groupStack.length === 0) {
157 throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unmatched ) at column ${i - 1}`);
158 }
159 lastGroup = groupStack.pop();
160 // Check if this group has a PIPE.
161 // To get back the correct last stack.
162 last = lastGroup.options ?
163 lastGroup.options[lastGroup.options.length - 1] :
164 lastGroup.stack;
165 break;
166 // Use pipe character to give more choices.
167 case '|': {
168 // Create array where options are if this is the first PIPE
169 // in this clause.
170 if (!lastGroup.options) {
171 lastGroup.options = [lastGroup.stack];
172 delete lastGroup.stack;
173 }
174 // Create a new stack and add to options for rest of clause.
175 let stack = [];
176 lastGroup.options.push(stack);
177 last = stack;
178 break;
179 }
180 // Repetition.
181 // For every repetition, remove last element from last stack
182 // then insert back a RANGE object.
183 // This design is chosen because there could be more than
184 // one repetition symbols in a regex i.e. `a?+{2,3}`.
185 case '{': {
186 let rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
187 if (rs !== null) {
188 if (last.length === 0) {
189 repeatErr(i);
190 }
191 min = parseInt(rs[1], 10);
192 max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
193 i += rs[0].length;
194 last.push({
195 type: types_1.types.REPETITION,
196 min,
197 max,
198 value: last.pop(),
199 });
200 }
201 else {
202 last.push({
203 type: types_1.types.CHAR,
204 value: 123,
205 });
206 }
207 break;
208 }
209 case '?':
210 if (last.length === 0) {
211 repeatErr(i);
212 }
213 last.push({
214 type: types_1.types.REPETITION,
215 min: 0,
216 max: 1,
217 value: last.pop(),
218 });
219 break;
220 case '+':
221 if (last.length === 0) {
222 repeatErr(i);
223 }
224 last.push({
225 type: types_1.types.REPETITION,
226 min: 1,
227 max: Infinity,
228 value: last.pop(),
229 });
230 break;
231 case '*':
232 if (last.length === 0) {
233 repeatErr(i);
234 }
235 last.push({
236 type: types_1.types.REPETITION,
237 min: 0,
238 max: Infinity,
239 value: last.pop(),
240 });
241 break;
242 // Default is a character that is not `\[](){}?+*^$`.
243 default:
244 last.push({
245 type: types_1.types.CHAR,
246 value: c.charCodeAt(0),
247 });
248 }
249 }
250 // Check if any groups have not been closed.
251 if (groupStack.length !== 0) {
252 throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unterminated group`);
253 }
254 return start;
255};
256//# sourceMappingURL=tokenizer.js.map
\No newline at end of file