UNPKG

17.7 kBJavaScriptView Raw
1import * as monarchCommon from './common';
2function isArrayOf(elemType, obj) {
3 if (!obj) {
4 return false;
5 }
6 if (!(Array.isArray(obj))) {
7 return false;
8 }
9 for (var _i = 0, obj_1 = obj; _i < obj_1.length; _i++) {
10 var el = obj_1[_i];
11 if (!(elemType(el))) {
12 return false;
13 }
14 }
15 return true;
16}
17function bool(prop, defValue) {
18 if (typeof prop === 'boolean') {
19 return prop;
20 }
21 return defValue;
22}
23function string(prop, defValue) {
24 if (typeof (prop) === 'string') {
25 return prop;
26 }
27 return defValue;
28}
29function arrayToHash(array) {
30 var result = {};
31 for (var _i = 0, array_1 = array; _i < array_1.length; _i++) {
32 var e = array_1[_i];
33 result[e] = true;
34 }
35 return result;
36}
37function createKeywordMatcher(arr, caseInsensitive) {
38 if (caseInsensitive === void 0) { caseInsensitive = false; }
39 if (caseInsensitive) {
40 arr = arr.map(function (x) { return x.toLowerCase(); });
41 }
42 var hash = arrayToHash(arr);
43 if (caseInsensitive) {
44 return function (word) {
45 return hash[word.toLowerCase()] !== undefined && hash.hasOwnProperty(word.toLowerCase());
46 };
47 }
48 else {
49 return function (word) {
50 return hash[word] !== undefined && hash.hasOwnProperty(word);
51 };
52 }
53}
54function compileRegExp(lexer, str) {
55 var n = 0;
56 while (str.indexOf('@') >= 0 && n < 5) {
57 n++;
58 str = str.replace(/@(\w+)/g, function (s, attr) {
59 var sub = '';
60 if (typeof (lexer[attr]) === 'string') {
61 sub = lexer[attr];
62 }
63 else if (lexer[attr] && lexer[attr] instanceof RegExp) {
64 sub = lexer[attr].source;
65 }
66 else {
67 if (lexer[attr] === undefined) {
68 throw monarchCommon.createError(lexer, 'language definition does not contain attribute \'' + attr + '\', used at: ' + str);
69 }
70 else {
71 throw monarchCommon.createError(lexer, 'attribute reference \'' + attr + '\' must be a string, used at: ' + str);
72 }
73 }
74 return (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
75 });
76 }
77 return new RegExp(str, (lexer.ignoreCase ? 'i' : ''));
78}
79function selectScrutinee(id, matches, state, num) {
80 if (num < 0) {
81 return id;
82 }
83 if (num < matches.length) {
84 return matches[num];
85 }
86 if (num >= 100) {
87 num = num - 100;
88 var parts = state.split('.');
89 parts.unshift(state);
90 if (num < parts.length) {
91 return parts[num];
92 }
93 }
94 return null;
95}
96function createGuard(lexer, ruleName, tkey, val) {
97 var scrut = -1;
98 var oppat = tkey;
99 var matches = tkey.match(/^\$(([sS]?)(\d\d?)|#)(.*)$/);
100 if (matches) {
101 if (matches[3]) {
102 scrut = parseInt(matches[3]);
103 if (matches[2]) {
104 scrut = scrut + 100;
105 }
106 }
107 oppat = matches[4];
108 }
109 var op = '~';
110 var pat = oppat;
111 if (!oppat || oppat.length === 0) {
112 op = '!=';
113 pat = '';
114 }
115 else if (/^\w*$/.test(pat)) {
116 op = '==';
117 }
118 else {
119 matches = oppat.match(/^(@|!@|~|!~|==|!=)(.*)$/);
120 if (matches) {
121 op = matches[1];
122 pat = matches[2];
123 }
124 }
125 var tester;
126 if ((op === '~' || op === '!~') && /^(\w|\|)*$/.test(pat)) {
127 var inWords_1 = createKeywordMatcher(pat.split('|'), lexer.ignoreCase);
128 tester = function (s) { return (op === '~' ? inWords_1(s) : !inWords_1(s)); };
129 }
130 else if (op === '@' || op === '!@') {
131 var words = lexer[pat];
132 if (!words) {
133 throw monarchCommon.createError(lexer, 'the @ match target \'' + pat + '\' is not defined, in rule: ' + ruleName);
134 }
135 if (!(isArrayOf(function (elem) { return (typeof (elem) === 'string'); }, words))) {
136 throw monarchCommon.createError(lexer, 'the @ match target \'' + pat + '\' must be an array of strings, in rule: ' + ruleName);
137 }
138 var inWords_2 = createKeywordMatcher(words, lexer.ignoreCase);
139 tester = function (s) { return (op === '@' ? inWords_2(s) : !inWords_2(s)); };
140 }
141 else if (op === '~' || op === '!~') {
142 if (pat.indexOf('$') < 0) {
143 var re_1 = compileRegExp(lexer, '^' + pat + '$');
144 tester = function (s) { return (op === '~' ? re_1.test(s) : !re_1.test(s)); };
145 }
146 else {
147 tester = function (s, id, matches, state) {
148 var re = compileRegExp(lexer, '^' + monarchCommon.substituteMatches(lexer, pat, id, matches, state) + '$');
149 return re.test(s);
150 };
151 }
152 }
153 else {
154 if (pat.indexOf('$') < 0) {
155 var patx_1 = monarchCommon.fixCase(lexer, pat);
156 tester = function (s) { return (op === '==' ? s === patx_1 : s !== patx_1); };
157 }
158 else {
159 var patx_2 = monarchCommon.fixCase(lexer, pat);
160 tester = function (s, id, matches, state, eos) {
161 var patexp = monarchCommon.substituteMatches(lexer, patx_2, id, matches, state);
162 return (op === '==' ? s === patexp : s !== patexp);
163 };
164 }
165 }
166 if (scrut === -1) {
167 return {
168 name: tkey, value: val, test: function (id, matches, state, eos) {
169 return tester(id, id, matches, state, eos);
170 }
171 };
172 }
173 else {
174 return {
175 name: tkey, value: val, test: function (id, matches, state, eos) {
176 var scrutinee = selectScrutinee(id, matches, state, scrut);
177 return tester(!scrutinee ? '' : scrutinee, id, matches, state, eos);
178 }
179 };
180 }
181}
182function compileAction(lexer, ruleName, action) {
183 if (!action) {
184 return { token: '' };
185 }
186 else if (typeof (action) === 'string') {
187 return action;
188 }
189 else if (action.token || action.token === '') {
190 if (typeof (action.token) !== 'string') {
191 throw monarchCommon.createError(lexer, 'a \'token\' attribute must be of type string, in rule: ' + ruleName);
192 }
193 else {
194 var newAction = { token: action.token };
195 if (action.token.indexOf('$') >= 0) {
196 newAction.tokenSubst = true;
197 }
198 if (typeof (action.bracket) === 'string') {
199 if (action.bracket === '@open') {
200 newAction.bracket = 1;
201 }
202 else if (action.bracket === '@close') {
203 newAction.bracket = -1;
204 }
205 else {
206 throw monarchCommon.createError(lexer, 'a \'bracket\' attribute must be either \'@open\' or \'@close\', in rule: ' + ruleName);
207 }
208 }
209 if (action.next) {
210 if (typeof (action.next) !== 'string') {
211 throw monarchCommon.createError(lexer, 'the next state must be a string value in rule: ' + ruleName);
212 }
213 else {
214 var next = action.next;
215 if (!/^(@pop|@push|@popall)$/.test(next)) {
216 if (next[0] === '@') {
217 next = next.substr(1);
218 }
219 if (next.indexOf('$') < 0) {
220 if (!monarchCommon.stateExists(lexer, monarchCommon.substituteMatches(lexer, next, '', [], ''))) {
221 throw monarchCommon.createError(lexer, 'the next state \'' + action.next + '\' is not defined in rule: ' + ruleName);
222 }
223 }
224 }
225 newAction.next = next;
226 }
227 }
228 if (typeof (action.goBack) === 'number') {
229 newAction.goBack = action.goBack;
230 }
231 if (typeof (action.switchTo) === 'string') {
232 newAction.switchTo = action.switchTo;
233 }
234 if (typeof (action.log) === 'string') {
235 newAction.log = action.log;
236 }
237 if (typeof (action._push) === 'string') {
238 newAction._push = action._push;
239 }
240 if (typeof (action._pop) === 'string') {
241 newAction._pop = action._pop;
242 }
243 if (typeof (action.mark) === 'string') {
244 newAction.mark = action.mark;
245 }
246 if (typeof (action.fn) === 'string') {
247 newAction.fn = action.fn;
248 }
249 if (typeof (action.nextEmbedded) === 'string') {
250 newAction.nextEmbedded = action.nextEmbedded;
251 lexer.usesEmbedded = true;
252 }
253 return newAction;
254 }
255 }
256 else if (Array.isArray(action)) {
257 var results = [];
258 for (var i = 0, len = action.length; i < len; i++) {
259 results[i] = compileAction(lexer, ruleName, action[i]);
260 }
261 return { group: results };
262 }
263 else if (action.cases) {
264 var cases_1 = [];
265 for (var tkey in action.cases) {
266 if (action.cases.hasOwnProperty(tkey)) {
267 var val = compileAction(lexer, ruleName, action.cases[tkey]);
268 if (tkey === '@default' || tkey === '@' || tkey === '') {
269 cases_1.push({ test: undefined, value: val, name: tkey });
270 }
271 else if (tkey === '@eos') {
272 cases_1.push({ test: function (id, matches, state, eos) { return eos; }, value: val, name: tkey });
273 }
274 else {
275 cases_1.push(createGuard(lexer, ruleName, tkey, val));
276 }
277 }
278 }
279 var def_1 = lexer.defaultToken;
280 return {
281 test: function (id, matches, state, eos) {
282 for (var _i = 0, cases_2 = cases_1; _i < cases_2.length; _i++) {
283 var _case = cases_2[_i];
284 var didmatch = (!_case.test || _case.test(id, matches, state, eos));
285 if (didmatch) {
286 return _case.value;
287 }
288 }
289 return def_1;
290 }
291 };
292 }
293 else {
294 throw monarchCommon.createError(lexer, 'an action must be a string, an object with a \'token\' or \'cases\' attribute, or an array of actions; in rule: ' + ruleName);
295 }
296}
297var Rule = (function () {
298 function Rule(name) {
299 this.regex = new RegExp('');
300 this.action = { token: '' };
301 this.matchOnlyAtLineStart = false;
302 this.name = '';
303 this.name = name;
304 this.stats = { time: 0, count: 0, hits: 0 };
305 }
306 Rule.prototype.setRegex = function (lexer, re) {
307 var sregex;
308 if (typeof (re) === 'string') {
309 sregex = re;
310 }
311 else if (re instanceof RegExp) {
312 sregex = re.source;
313 }
314 else {
315 throw monarchCommon.createError(lexer, 'rules must start with a match string or regular expression: ' + this.name);
316 }
317 if (sregex.length == 2 && sregex[0] == '\\' && (/[\{\}\(\)\[\]]/).test(sregex[1])) {
318 this.string = sregex[1];
319 }
320 this.matchOnlyAtLineStart = (sregex.length > 0 && sregex[0] === '^');
321 this.name = this.name + ': ' + sregex;
322 this.regex = compileRegExp(lexer, '^(?:' + (this.matchOnlyAtLineStart ? sregex.substr(1) : sregex) + ')');
323 };
324 Rule.prototype.setAction = function (lexer, act) {
325 this.action = compileAction(lexer, this.name, act);
326 };
327 return Rule;
328}());
329export function compile(languageId, json) {
330 if (!json || typeof (json) !== 'object') {
331 throw new Error('Monarch: expecting a language definition object');
332 }
333 var lexer = {};
334 lexer.languageId = languageId;
335 lexer.noThrow = false;
336 lexer.maxStack = 100;
337 lexer.start = (typeof json.start === 'string' ? json.start : null);
338 lexer.ignoreCase = bool(json.ignoreCase, false);
339 lexer.tokenPostfix = string(json.tokenPostfix, '.' + lexer.languageId);
340 lexer.defaultToken = string(json.defaultToken, 'source');
341 lexer.usesEmbedded = false;
342 var lexerMin = json;
343 lexerMin.languageId = languageId;
344 lexerMin.ignoreCase = lexer.ignoreCase;
345 lexerMin.noThrow = lexer.noThrow;
346 lexerMin.usesEmbedded = lexer.usesEmbedded;
347 lexerMin.stateNames = json.tokenizer;
348 lexerMin.defaultToken = lexer.defaultToken;
349 function addRules(state, newrules, rules) {
350 for (var _i = 0, rules_1 = rules; _i < rules_1.length; _i++) {
351 var rule = rules_1[_i];
352 var include = rule.include;
353 if (include) {
354 if (typeof (include) !== 'string') {
355 throw monarchCommon.createError(lexer, 'an \'include\' attribute must be a string at: ' + state);
356 }
357 if (include[0] === '@') {
358 include = include.substr(1);
359 }
360 if (!json.tokenizer[include]) {
361 throw monarchCommon.createError(lexer, 'include target \'' + include + '\' is not defined at: ' + state);
362 }
363 addRules(state + '.' + include, newrules, json.tokenizer[include]);
364 }
365 else {
366 var newrule = new Rule(state);
367 if (Array.isArray(rule) && rule.length >= 1 && rule.length <= 3) {
368 newrule.setRegex(lexerMin, rule[0]);
369 if (rule.length >= 3) {
370 if (typeof (rule[1]) === 'string') {
371 newrule.setAction(lexerMin, { token: rule[1], next: rule[2] });
372 }
373 else if (typeof (rule[1]) === 'object') {
374 var rule1 = rule[1];
375 rule1.next = rule[2];
376 newrule.setAction(lexerMin, rule1);
377 }
378 else {
379 throw monarchCommon.createError(lexer, 'a next state as the last element of a rule can only be given if the action is either an object or a string, at: ' + state);
380 }
381 }
382 else {
383 newrule.setAction(lexerMin, rule[1]);
384 }
385 }
386 else {
387 if (!rule.regex) {
388 throw monarchCommon.createError(lexer, 'a rule must either be an array, or an object with a \'regex\' or \'include\' field at: ' + state);
389 }
390 if (rule.name) {
391 if (typeof rule.name === 'string') {
392 newrule.name = rule.name;
393 }
394 }
395 if (rule.matchOnlyAtStart) {
396 newrule.matchOnlyAtLineStart = bool(rule.matchOnlyAtLineStart, false);
397 }
398 newrule.setRegex(lexerMin, rule.regex);
399 newrule.setAction(lexerMin, rule.action);
400 }
401 newrules.push(newrule);
402 }
403 }
404 }
405 if (!json.tokenizer || typeof (json.tokenizer) !== 'object') {
406 throw monarchCommon.createError(lexer, 'a language definition must define the \'tokenizer\' attribute as an object');
407 }
408 lexer.tokenizer = [];
409 for (var key in json.tokenizer) {
410 if (json.tokenizer.hasOwnProperty(key)) {
411 if (!lexer.start) {
412 lexer.start = key;
413 }
414 var rules = json.tokenizer[key];
415 lexer.tokenizer[key] = new Array();
416 addRules('tokenizer.' + key, lexer.tokenizer[key], rules);
417 }
418 }
419 lexer.usesEmbedded = lexerMin.usesEmbedded;
420 if (json.brackets) {
421 if (!(Array.isArray(json.brackets))) {
422 throw monarchCommon.createError(lexer, 'the \'brackets\' attribute must be defined as an array');
423 }
424 }
425 else {
426 json.brackets = [
427 { open: '{', close: '}', token: 'delimiter.curly' },
428 { open: '[', close: ']', token: 'delimiter.square' },
429 { open: '(', close: ')', token: 'delimiter.parenthesis' },
430 { open: '<', close: '>', token: 'delimiter.angle' }
431 ];
432 }
433 var brackets = [];
434 for (var _i = 0, _a = json.brackets; _i < _a.length; _i++) {
435 var el = _a[_i];
436 var desc = el;
437 if (desc && Array.isArray(desc) && desc.length === 3) {
438 desc = { token: desc[2], open: desc[0], close: desc[1] };
439 }
440 if (desc.open === desc.close) {
441 throw monarchCommon.createError(lexer, 'open and close brackets in a \'brackets\' attribute must be different: ' + desc.open +
442 '\n hint: use the \'bracket\' attribute if matching on equal brackets is required.');
443 }
444 if (typeof desc.open === 'string' && typeof desc.token === 'string' && typeof desc.close === 'string') {
445 brackets.push({
446 token: desc.token + lexer.tokenPostfix,
447 open: monarchCommon.fixCase(lexer, desc.open),
448 close: monarchCommon.fixCase(lexer, desc.close)
449 });
450 }
451 else {
452 throw monarchCommon.createError(lexer, 'every element in the \'brackets\' array must be a \'{open,close,token}\' object or array');
453 }
454 }
455 lexer.brackets = brackets;
456 lexer.noThrow = true;
457 return lexer;
458}