UNPKG

ret/dist/tokenizer.js

Version:

9.86 kBJavaScriptView Raw

1"use strict";
2var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
  if (k2 === undefined) k2 = k;
  Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
5}) : (function(o, m, k, k2) {
  if (k2 === undefined) k2 = k;
  o[k2] = m[k];
8}));
9var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
  Object.defineProperty(o, "default", { enumerable: true, value: v });
11}) : function(o, v) {
  o["default"] = v;
13});
14var __importStar = (this && this.__importStar) || function (mod) {
  if (mod && mod.__esModule) return mod;
  var result = {};
  if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
  __setModuleDefault(result, mod);
  return result;
20};
21Object.defineProperty(exports, "__esModule", { value: true });
22exports.tokenizer = void 0;
23const util = __importStar(require("./util"));
24const types_1 = require("./types");
25const sets = __importStar(require("./sets"));
26/**
* Tokenizes a regular expression (that is currently a string)
* @param {string} regexpStr String of regular expression to be tokenized
*
* @returns {Root}
*/
32exports.tokenizer = (regexpStr) => {
  let i = 0, c;
  let start = { type: types_1.types.ROOT, stack: [] };
  // Keep track of last clause/group and stack.
  let lastGroup = start;
  let last = start.stack;
  let groupStack = [];
  const repeatErr = (col) => {
      throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Nothing to repeat at column ${col - 1}`);
  };
  // Decode a few escaped characters.
  let str = util.strToChars(regexpStr);
  // Iterate through each character in string.
  while (i < str.length) {
      switch (c = str[i++]) {
          // Handle escaped characters, inclues a few sets.
          case '\\':
              switch (c = str[i++]) {
                  case 'b':
                      last.push({ type: types_1.types.POSITION, value: 'b' });
                      break;
                  case 'B':
                      last.push({ type: types_1.types.POSITION, value: 'B' });
                      break;
                  case 'w':
                      last.push(sets.words());
                      break;
                  case 'W':
                      last.push(sets.notWords());
                      break;
                  case 'd':
                      last.push(sets.ints());
                      break;
                  case 'D':
                      last.push(sets.notInts());
                      break;
                  case 's':
                      last.push(sets.whitespace());
                      break;
                  case 'S':
                      last.push(sets.notWhitespace());
                      break;
                  default:
                      // Check if c is integer.
                      // In which case it's a reference.
                      if (/\d/.test(c)) {
                          last.push({ type: types_1.types.REFERENCE, value: parseInt(c, 10) });
                          // Escaped character.
                      }
                      else {
                          last.push({ type: types_1.types.CHAR, value: c.charCodeAt(0) });
                      }
              }
              break;
          // Positionals.
          case '^':
              last.push({ type: types_1.types.POSITION, value: '^' });
              break;
          case '$':
              last.push({ type: types_1.types.POSITION, value: '$' });
              break;
          // Handle custom sets.
          case '[': {
              // Check if this class is 'anti' i.e. [^abc].
              let not;
              if (str[i] === '^') {
                  not = true;
                  i++;
              }
              else {
                  not = false;
              }
              // Get all the characters in class.
              let classTokens = util.tokenizeClass(str.slice(i), regexpStr);
              // Increase index by length of class.
              i += classTokens[1];
              last.push({
                  type: types_1.types.SET,
                  set: classTokens[0],
                  not,
              });
              break;
          }
          // Class of any character except \n.
          case '.':
              last.push(sets.anyChar());
              break;
          // Push group onto stack.
          case '(': {
              // Create group.
              let group = {
                  type: types_1.types.GROUP,
                  stack: [],
                  remember: true,
              };
              // If if this is a special kind of group.
              if (str[i] === '?') {
                  c = str[i + 1];
                  i += 2;
                  // Match if followed by.
                  if (c === '=') {
                      group.followedBy = true;
                      // Match if not followed by.
                  }
                  else if (c === '!') {
                      group.notFollowedBy = true;
                  }
                  else if (c !== ':') {
                      throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid group, character '${c}'` +
                          ` after '?' at column ${i - 1}`);
                  }
                  group.remember = false;
              }
              // Insert subgroup into current group stack.
              last.push(group);
              // Remember the current group for when the group closes.
              groupStack.push(lastGroup);
              // Make this new group the current group.
              lastGroup = group;
              last = group.stack;
              break;
          }
          // Pop group out of stack.
          case ')':
              if (groupStack.length === 0) {
                  throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unmatched ) at column ${i - 1}`);
              }
              lastGroup = groupStack.pop();
              // Check if this group has a PIPE.
              // To get back the correct last stack.
              last = lastGroup.options ?
                  lastGroup.options[lastGroup.options.length - 1] :
                  lastGroup.stack;
              break;
          // Use pipe character to give more choices.
          case '|': {
              // Create array where options are if this is the first PIPE
              // in this clause.
              if (!lastGroup.options) {
                  lastGroup.options = [lastGroup.stack];
                  delete lastGroup.stack;
              }
              // Create a new stack and add to options for rest of clause.
              let stack = [];
              lastGroup.options.push(stack);
              last = stack;
              break;
          }
          // Repetition.
          // For every repetition, remove last element from last stack
          // then insert back a RANGE object.
          // This design is chosen because there could be more than
          // one repetition symbols in a regex i.e. `a?+{2,3}`.
          case '{': {
              let rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
              if (rs !== null) {
                  if (last.length === 0) {
                      repeatErr(i);
                  }
                  min = parseInt(rs[1], 10);
                  max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
                  i += rs[0].length;
                  last.push({
                      type: types_1.types.REPETITION,
                      min,
                      max,
                      value: last.pop(),
                  });
              }
              else {
                  last.push({
                      type: types_1.types.CHAR,
                      value: 123,
                  });
              }
              break;
          }
          case '?':
              if (last.length === 0) {
                  repeatErr(i);
              }
              last.push({
                  type: types_1.types.REPETITION,
                  min: 0,
                  max: 1,
                  value: last.pop(),
              });
              break;
          case '+':
              if (last.length === 0) {
                  repeatErr(i);
              }
              last.push({
                  type: types_1.types.REPETITION,
                  min: 1,
                  max: Infinity,
                  value: last.pop(),
              });
              break;
          case '*':
              if (last.length === 0) {
                  repeatErr(i);
              }
              last.push({
                  type: types_1.types.REPETITION,
                  min: 0,
                  max: Infinity,
                  value: last.pop(),
              });
              break;
          // Default is a character that is not `\[](){}?+*^$`.
          default:
              last.push({
                  type: types_1.types.CHAR,
                  value: c.charCodeAt(0),
              });
      }
  }
  // Check if any groups have not been closed.
  if (groupStack.length !== 0) {
      throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unterminated group`);
  }
  return start;
255};
256//# sourceMappingURL=tokenizer.js.map
\No newline at end of file

1	`"use strict";`
2	`var __createBinding = (this && this.__createBinding) \|\| (Object.create ? (function(o, m, k, k2) {`
3	`if (k2 === undefined) k2 = k;`
4	`Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });`
5	`}) : (function(o, m, k, k2) {`
6	`if (k2 === undefined) k2 = k;`
7	`o[k2] = m[k];`
8	`}));`
9	`var __setModuleDefault = (this && this.__setModuleDefault) \|\| (Object.create ? (function(o, v) {`
10	`Object.defineProperty(o, "default", { enumerable: true, value: v });`
11	`}) : function(o, v) {`
12	`o["default"] = v;`
13	`});`
14	`var __importStar = (this && this.__importStar) \|\| function (mod) {`
15	`if (mod && mod.__esModule) return mod;`
16	`var result = {};`
17	`if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);`
18	`__setModuleDefault(result, mod);`
19	`return result;`
20	`};`
21	`Object.defineProperty(exports, "__esModule", { value: true });`
22	`exports.tokenizer = void 0;`
23	`const util = __importStar(require("./util"));`
24	`const types_1 = require("./types");`
25	`const sets = __importStar(require("./sets"));`
26	`/**`
27	`* Tokenizes a regular expression (that is currently a string)`
28	`* @param {string} regexpStr String of regular expression to be tokenized`
29	`*`
30	`* @returns {Root}`
31	`*/`
32	`exports.tokenizer = (regexpStr) => {`
33	`let i = 0, c;`
34	`let start = { type: types_1.types.ROOT, stack: [] };`
35	`// Keep track of last clause/group and stack.`
36	`let lastGroup = start;`
37	`let last = start.stack;`
38	`let groupStack = [];`
39	`const repeatErr = (col) => {`
40	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Nothing to repeat at column ${col - 1}`);
41	`};`
42	`// Decode a few escaped characters.`
43	`let str = util.strToChars(regexpStr);`
44	`// Iterate through each character in string.`
45	`while (i < str.length) {`
46	`switch (c = str[i++]) {`
47	`// Handle escaped characters, inclues a few sets.`
48	`case '\\':`
49	`switch (c = str[i++]) {`
50	`case 'b':`
51	`last.push({ type: types_1.types.POSITION, value: 'b' });`
52	`break;`
53	`case 'B':`
54	`last.push({ type: types_1.types.POSITION, value: 'B' });`
55	`break;`
56	`case 'w':`
57	`last.push(sets.words());`
58	`break;`
59	`case 'W':`
60	`last.push(sets.notWords());`
61	`break;`
62	`case 'd':`
63	`last.push(sets.ints());`
64	`break;`
65	`case 'D':`
66	`last.push(sets.notInts());`
67	`break;`
68	`case 's':`
69	`last.push(sets.whitespace());`
70	`break;`
71	`case 'S':`
72	`last.push(sets.notWhitespace());`
73	`break;`
74	`default:`
75	`// Check if c is integer.`
76	`// In which case it's a reference.`
77	`if (/\d/.test(c)) {`
78	`last.push({ type: types_1.types.REFERENCE, value: parseInt(c, 10) });`
79	`// Escaped character.`
80	`}`
81	`else {`
82	`last.push({ type: types_1.types.CHAR, value: c.charCodeAt(0) });`
83	`}`
84	`}`
85	`break;`
86	`// Positionals.`
87	`case '^':`
88	`last.push({ type: types_1.types.POSITION, value: '^' });`
89	`break;`
90	`case '$':`
91	`last.push({ type: types_1.types.POSITION, value: '$' });`
92	`break;`
93	`// Handle custom sets.`
94	`case '[': {`
95	`// Check if this class is 'anti' i.e. [^abc].`
96	`let not;`
97	`if (str[i] === '^') {`
98	`not = true;`
99	`i++;`
100	`}`
101	`else {`
102	`not = false;`
103	`}`
104	`// Get all the characters in class.`
105	`let classTokens = util.tokenizeClass(str.slice(i), regexpStr);`
106	`// Increase index by length of class.`
107	`i += classTokens[1];`
108	`last.push({`
109	`type: types_1.types.SET,`
110	`set: classTokens[0],`
111	`not,`
112	`});`
113	`break;`
114	`}`
115	`// Class of any character except \n.`
116	`case '.':`
117	`last.push(sets.anyChar());`
118	`break;`
119	`// Push group onto stack.`
120	`case '(': {`
121	`// Create group.`
122	`let group = {`
123	`type: types_1.types.GROUP,`
124	`stack: [],`
125	`remember: true,`
126	`};`
127	`// If if this is a special kind of group.`
128	`if (str[i] === '?') {`
129	`c = str[i + 1];`
130	`i += 2;`
131	`// Match if followed by.`
132	`if (c === '=') {`
133	`group.followedBy = true;`
134	`// Match if not followed by.`
135	`}`
136	`else if (c === '!') {`
137	`group.notFollowedBy = true;`
138	`}`
139	`else if (c !== ':') {`
140	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid group, character '${c}'` +
141	` after '?' at column ${i - 1}`);
142	`}`
143	`group.remember = false;`
144	`}`
145	`// Insert subgroup into current group stack.`
146	`last.push(group);`
147	`// Remember the current group for when the group closes.`
148	`groupStack.push(lastGroup);`
149	`// Make this new group the current group.`
150	`lastGroup = group;`
151	`last = group.stack;`
152	`break;`
153	`}`
154	`// Pop group out of stack.`
155	`case ')':`
156	`if (groupStack.length === 0) {`
157	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unmatched ) at column ${i - 1}`);
158	`}`
159	`lastGroup = groupStack.pop();`
160	`// Check if this group has a PIPE.`
161	`// To get back the correct last stack.`
162	`last = lastGroup.options ?`
163	`lastGroup.options[lastGroup.options.length - 1] :`
164	`lastGroup.stack;`
165	`break;`
166	`// Use pipe character to give more choices.`
167	`case '\|': {`
168	`// Create array where options are if this is the first PIPE`
169	`// in this clause.`
170	`if (!lastGroup.options) {`
171	`lastGroup.options = [lastGroup.stack];`
172	`delete lastGroup.stack;`
173	`}`
174	`// Create a new stack and add to options for rest of clause.`
175	`let stack = [];`
176	`lastGroup.options.push(stack);`
177	`last = stack;`
178	`break;`
179	`}`
180	`// Repetition.`
181	`// For every repetition, remove last element from last stack`
182	`// then insert back a RANGE object.`
183	`// This design is chosen because there could be more than`
184	// one repetition symbols in a regex i.e. `a?+{2,3}`.
185	`case '{': {`
186	`let rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;`
187	`if (rs !== null) {`
188	`if (last.length === 0) {`
189	`repeatErr(i);`
190	`}`
191	`min = parseInt(rs[1], 10);`
192	`max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;`
193	`i += rs[0].length;`
194	`last.push({`
195	`type: types_1.types.REPETITION,`
196	`min,`
197	`max,`
198	`value: last.pop(),`
199	`});`
200	`}`
201	`else {`
202	`last.push({`
203	`type: types_1.types.CHAR,`
204	`value: 123,`
205	`});`
206	`}`
207	`break;`
208	`}`
209	`case '?':`
210	`if (last.length === 0) {`
211	`repeatErr(i);`
212	`}`
213	`last.push({`
214	`type: types_1.types.REPETITION,`
215	`min: 0,`
216	`max: 1,`
217	`value: last.pop(),`
218	`});`
219	`break;`
220	`case '+':`
221	`if (last.length === 0) {`
222	`repeatErr(i);`
223	`}`
224	`last.push({`
225	`type: types_1.types.REPETITION,`
226	`min: 1,`
227	`max: Infinity,`
228	`value: last.pop(),`
229	`});`
230	`break;`
231	`case '*':`
232	`if (last.length === 0) {`
233	`repeatErr(i);`
234	`}`
235	`last.push({`
236	`type: types_1.types.REPETITION,`
237	`min: 0,`
238	`max: Infinity,`
239	`value: last.pop(),`
240	`});`
241	`break;`
242	// Default is a character that is not `\[](){}?+*^$`.
243	`default:`
244	`last.push({`
245	`type: types_1.types.CHAR,`
246	`value: c.charCodeAt(0),`
247	`});`
248	`}`
249	`}`
250	`// Check if any groups have not been closed.`
251	`if (groupStack.length !== 0) {`
252	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unterminated group`);
253	`}`
254	`return start;`
255	`};`
256	`//# sourceMappingURL=tokenizer.js.map`
\	No newline at end of file