1 | import * as monarchCommon from './common';
|
2 | function isArrayOf(elemType, obj) {
|
3 | if (!obj) {
|
4 | return false;
|
5 | }
|
6 | if (!(Array.isArray(obj))) {
|
7 | return false;
|
8 | }
|
9 | for (var _i = 0, obj_1 = obj; _i < obj_1.length; _i++) {
|
10 | var el = obj_1[_i];
|
11 | if (!(elemType(el))) {
|
12 | return false;
|
13 | }
|
14 | }
|
15 | return true;
|
16 | }
|
17 | function bool(prop, defValue) {
|
18 | if (typeof prop === 'boolean') {
|
19 | return prop;
|
20 | }
|
21 | return defValue;
|
22 | }
|
23 | function string(prop, defValue) {
|
24 | if (typeof (prop) === 'string') {
|
25 | return prop;
|
26 | }
|
27 | return defValue;
|
28 | }
|
29 | function arrayToHash(array) {
|
30 | var result = {};
|
31 | for (var _i = 0, array_1 = array; _i < array_1.length; _i++) {
|
32 | var e = array_1[_i];
|
33 | result[e] = true;
|
34 | }
|
35 | return result;
|
36 | }
|
37 | function createKeywordMatcher(arr, caseInsensitive) {
|
38 | if (caseInsensitive === void 0) { caseInsensitive = false; }
|
39 | if (caseInsensitive) {
|
40 | arr = arr.map(function (x) { return x.toLowerCase(); });
|
41 | }
|
42 | var hash = arrayToHash(arr);
|
43 | if (caseInsensitive) {
|
44 | return function (word) {
|
45 | return hash[word.toLowerCase()] !== undefined && hash.hasOwnProperty(word.toLowerCase());
|
46 | };
|
47 | }
|
48 | else {
|
49 | return function (word) {
|
50 | return hash[word] !== undefined && hash.hasOwnProperty(word);
|
51 | };
|
52 | }
|
53 | }
|
54 | function compileRegExp(lexer, str) {
|
55 | var n = 0;
|
56 | while (str.indexOf('@') >= 0 && n < 5) {
|
57 | n++;
|
58 | str = str.replace(/@(\w+)/g, function (s, attr) {
|
59 | var sub = '';
|
60 | if (typeof (lexer[attr]) === 'string') {
|
61 | sub = lexer[attr];
|
62 | }
|
63 | else if (lexer[attr] && lexer[attr] instanceof RegExp) {
|
64 | sub = lexer[attr].source;
|
65 | }
|
66 | else {
|
67 | if (lexer[attr] === undefined) {
|
68 | throw monarchCommon.createError(lexer, 'language definition does not contain attribute \'' + attr + '\', used at: ' + str);
|
69 | }
|
70 | else {
|
71 | throw monarchCommon.createError(lexer, 'attribute reference \'' + attr + '\' must be a string, used at: ' + str);
|
72 | }
|
73 | }
|
74 | return (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
|
75 | });
|
76 | }
|
77 | return new RegExp(str, (lexer.ignoreCase ? 'i' : ''));
|
78 | }
|
79 | function selectScrutinee(id, matches, state, num) {
|
80 | if (num < 0) {
|
81 | return id;
|
82 | }
|
83 | if (num < matches.length) {
|
84 | return matches[num];
|
85 | }
|
86 | if (num >= 100) {
|
87 | num = num - 100;
|
88 | var parts = state.split('.');
|
89 | parts.unshift(state);
|
90 | if (num < parts.length) {
|
91 | return parts[num];
|
92 | }
|
93 | }
|
94 | return null;
|
95 | }
|
96 | function createGuard(lexer, ruleName, tkey, val) {
|
97 | var scrut = -1;
|
98 | var oppat = tkey;
|
99 | var matches = tkey.match(/^\$(([sS]?)(\d\d?)|#)(.*)$/);
|
100 | if (matches) {
|
101 | if (matches[3]) {
|
102 | scrut = parseInt(matches[3]);
|
103 | if (matches[2]) {
|
104 | scrut = scrut + 100;
|
105 | }
|
106 | }
|
107 | oppat = matches[4];
|
108 | }
|
109 | var op = '~';
|
110 | var pat = oppat;
|
111 | if (!oppat || oppat.length === 0) {
|
112 | op = '!=';
|
113 | pat = '';
|
114 | }
|
115 | else if (/^\w*$/.test(pat)) {
|
116 | op = '==';
|
117 | }
|
118 | else {
|
119 | matches = oppat.match(/^(@|!@|~|!~|==|!=)(.*)$/);
|
120 | if (matches) {
|
121 | op = matches[1];
|
122 | pat = matches[2];
|
123 | }
|
124 | }
|
125 | var tester;
|
126 | if ((op === '~' || op === '!~') && /^(\w|\|)*$/.test(pat)) {
|
127 | var inWords_1 = createKeywordMatcher(pat.split('|'), lexer.ignoreCase);
|
128 | tester = function (s) { return (op === '~' ? inWords_1(s) : !inWords_1(s)); };
|
129 | }
|
130 | else if (op === '@' || op === '!@') {
|
131 | var words = lexer[pat];
|
132 | if (!words) {
|
133 | throw monarchCommon.createError(lexer, 'the @ match target \'' + pat + '\' is not defined, in rule: ' + ruleName);
|
134 | }
|
135 | if (!(isArrayOf(function (elem) { return (typeof (elem) === 'string'); }, words))) {
|
136 | throw monarchCommon.createError(lexer, 'the @ match target \'' + pat + '\' must be an array of strings, in rule: ' + ruleName);
|
137 | }
|
138 | var inWords_2 = createKeywordMatcher(words, lexer.ignoreCase);
|
139 | tester = function (s) { return (op === '@' ? inWords_2(s) : !inWords_2(s)); };
|
140 | }
|
141 | else if (op === '~' || op === '!~') {
|
142 | if (pat.indexOf('$') < 0) {
|
143 | var re_1 = compileRegExp(lexer, '^' + pat + '$');
|
144 | tester = function (s) { return (op === '~' ? re_1.test(s) : !re_1.test(s)); };
|
145 | }
|
146 | else {
|
147 | tester = function (s, id, matches, state) {
|
148 | var re = compileRegExp(lexer, '^' + monarchCommon.substituteMatches(lexer, pat, id, matches, state) + '$');
|
149 | return re.test(s);
|
150 | };
|
151 | }
|
152 | }
|
153 | else {
|
154 | if (pat.indexOf('$') < 0) {
|
155 | var patx_1 = monarchCommon.fixCase(lexer, pat);
|
156 | tester = function (s) { return (op === '==' ? s === patx_1 : s !== patx_1); };
|
157 | }
|
158 | else {
|
159 | var patx_2 = monarchCommon.fixCase(lexer, pat);
|
160 | tester = function (s, id, matches, state, eos) {
|
161 | var patexp = monarchCommon.substituteMatches(lexer, patx_2, id, matches, state);
|
162 | return (op === '==' ? s === patexp : s !== patexp);
|
163 | };
|
164 | }
|
165 | }
|
166 | if (scrut === -1) {
|
167 | return {
|
168 | name: tkey, value: val, test: function (id, matches, state, eos) {
|
169 | return tester(id, id, matches, state, eos);
|
170 | }
|
171 | };
|
172 | }
|
173 | else {
|
174 | return {
|
175 | name: tkey, value: val, test: function (id, matches, state, eos) {
|
176 | var scrutinee = selectScrutinee(id, matches, state, scrut);
|
177 | return tester(!scrutinee ? '' : scrutinee, id, matches, state, eos);
|
178 | }
|
179 | };
|
180 | }
|
181 | }
|
182 | function compileAction(lexer, ruleName, action) {
|
183 | if (!action) {
|
184 | return { token: '' };
|
185 | }
|
186 | else if (typeof (action) === 'string') {
|
187 | return action;
|
188 | }
|
189 | else if (action.token || action.token === '') {
|
190 | if (typeof (action.token) !== 'string') {
|
191 | throw monarchCommon.createError(lexer, 'a \'token\' attribute must be of type string, in rule: ' + ruleName);
|
192 | }
|
193 | else {
|
194 | var newAction = { token: action.token };
|
195 | if (action.token.indexOf('$') >= 0) {
|
196 | newAction.tokenSubst = true;
|
197 | }
|
198 | if (typeof (action.bracket) === 'string') {
|
199 | if (action.bracket === '@open') {
|
200 | newAction.bracket = 1;
|
201 | }
|
202 | else if (action.bracket === '@close') {
|
203 | newAction.bracket = -1;
|
204 | }
|
205 | else {
|
206 | throw monarchCommon.createError(lexer, 'a \'bracket\' attribute must be either \'@open\' or \'@close\', in rule: ' + ruleName);
|
207 | }
|
208 | }
|
209 | if (action.next) {
|
210 | if (typeof (action.next) !== 'string') {
|
211 | throw monarchCommon.createError(lexer, 'the next state must be a string value in rule: ' + ruleName);
|
212 | }
|
213 | else {
|
214 | var next = action.next;
|
215 | if (!/^(@pop|@push|@popall)$/.test(next)) {
|
216 | if (next[0] === '@') {
|
217 | next = next.substr(1);
|
218 | }
|
219 | if (next.indexOf('$') < 0) {
|
220 | if (!monarchCommon.stateExists(lexer, monarchCommon.substituteMatches(lexer, next, '', [], ''))) {
|
221 | throw monarchCommon.createError(lexer, 'the next state \'' + action.next + '\' is not defined in rule: ' + ruleName);
|
222 | }
|
223 | }
|
224 | }
|
225 | newAction.next = next;
|
226 | }
|
227 | }
|
228 | if (typeof (action.goBack) === 'number') {
|
229 | newAction.goBack = action.goBack;
|
230 | }
|
231 | if (typeof (action.switchTo) === 'string') {
|
232 | newAction.switchTo = action.switchTo;
|
233 | }
|
234 | if (typeof (action.log) === 'string') {
|
235 | newAction.log = action.log;
|
236 | }
|
237 | if (typeof (action._push) === 'string') {
|
238 | newAction._push = action._push;
|
239 | }
|
240 | if (typeof (action._pop) === 'string') {
|
241 | newAction._pop = action._pop;
|
242 | }
|
243 | if (typeof (action.mark) === 'string') {
|
244 | newAction.mark = action.mark;
|
245 | }
|
246 | if (typeof (action.fn) === 'string') {
|
247 | newAction.fn = action.fn;
|
248 | }
|
249 | if (typeof (action.nextEmbedded) === 'string') {
|
250 | newAction.nextEmbedded = action.nextEmbedded;
|
251 | lexer.usesEmbedded = true;
|
252 | }
|
253 | return newAction;
|
254 | }
|
255 | }
|
256 | else if (Array.isArray(action)) {
|
257 | var results = [];
|
258 | for (var i = 0, len = action.length; i < len; i++) {
|
259 | results[i] = compileAction(lexer, ruleName, action[i]);
|
260 | }
|
261 | return { group: results };
|
262 | }
|
263 | else if (action.cases) {
|
264 | var cases_1 = [];
|
265 | for (var tkey in action.cases) {
|
266 | if (action.cases.hasOwnProperty(tkey)) {
|
267 | var val = compileAction(lexer, ruleName, action.cases[tkey]);
|
268 | if (tkey === '@default' || tkey === '@' || tkey === '') {
|
269 | cases_1.push({ test: undefined, value: val, name: tkey });
|
270 | }
|
271 | else if (tkey === '@eos') {
|
272 | cases_1.push({ test: function (id, matches, state, eos) { return eos; }, value: val, name: tkey });
|
273 | }
|
274 | else {
|
275 | cases_1.push(createGuard(lexer, ruleName, tkey, val));
|
276 | }
|
277 | }
|
278 | }
|
279 | var def_1 = lexer.defaultToken;
|
280 | return {
|
281 | test: function (id, matches, state, eos) {
|
282 | for (var _i = 0, cases_2 = cases_1; _i < cases_2.length; _i++) {
|
283 | var _case = cases_2[_i];
|
284 | var didmatch = (!_case.test || _case.test(id, matches, state, eos));
|
285 | if (didmatch) {
|
286 | return _case.value;
|
287 | }
|
288 | }
|
289 | return def_1;
|
290 | }
|
291 | };
|
292 | }
|
293 | else {
|
294 | throw monarchCommon.createError(lexer, 'an action must be a string, an object with a \'token\' or \'cases\' attribute, or an array of actions; in rule: ' + ruleName);
|
295 | }
|
296 | }
|
297 | var Rule = (function () {
|
298 | function Rule(name) {
|
299 | this.regex = new RegExp('');
|
300 | this.action = { token: '' };
|
301 | this.matchOnlyAtLineStart = false;
|
302 | this.name = '';
|
303 | this.name = name;
|
304 | this.stats = { time: 0, count: 0, hits: 0 };
|
305 | }
|
306 | Rule.prototype.setRegex = function (lexer, re) {
|
307 | var sregex;
|
308 | if (typeof (re) === 'string') {
|
309 | sregex = re;
|
310 | }
|
311 | else if (re instanceof RegExp) {
|
312 | sregex = re.source;
|
313 | }
|
314 | else {
|
315 | throw monarchCommon.createError(lexer, 'rules must start with a match string or regular expression: ' + this.name);
|
316 | }
|
317 | if (sregex.length == 2 && sregex[0] == '\\' && (/[\{\}\(\)\[\]]/).test(sregex[1])) {
|
318 | this.string = sregex[1];
|
319 | }
|
320 | this.matchOnlyAtLineStart = (sregex.length > 0 && sregex[0] === '^');
|
321 | this.name = this.name + ': ' + sregex;
|
322 | this.regex = compileRegExp(lexer, '^(?:' + (this.matchOnlyAtLineStart ? sregex.substr(1) : sregex) + ')');
|
323 | };
|
324 | Rule.prototype.setAction = function (lexer, act) {
|
325 | this.action = compileAction(lexer, this.name, act);
|
326 | };
|
327 | return Rule;
|
328 | }());
|
329 | export function compile(languageId, json) {
|
330 | if (!json || typeof (json) !== 'object') {
|
331 | throw new Error('Monarch: expecting a language definition object');
|
332 | }
|
333 | var lexer = {};
|
334 | lexer.languageId = languageId;
|
335 | lexer.noThrow = false;
|
336 | lexer.maxStack = 100;
|
337 | lexer.start = (typeof json.start === 'string' ? json.start : null);
|
338 | lexer.ignoreCase = bool(json.ignoreCase, false);
|
339 | lexer.tokenPostfix = string(json.tokenPostfix, '.' + lexer.languageId);
|
340 | lexer.defaultToken = string(json.defaultToken, 'source');
|
341 | lexer.usesEmbedded = false;
|
342 | var lexerMin = json;
|
343 | lexerMin.languageId = languageId;
|
344 | lexerMin.ignoreCase = lexer.ignoreCase;
|
345 | lexerMin.noThrow = lexer.noThrow;
|
346 | lexerMin.usesEmbedded = lexer.usesEmbedded;
|
347 | lexerMin.stateNames = json.tokenizer;
|
348 | lexerMin.defaultToken = lexer.defaultToken;
|
349 | function addRules(state, newrules, rules) {
|
350 | for (var _i = 0, rules_1 = rules; _i < rules_1.length; _i++) {
|
351 | var rule = rules_1[_i];
|
352 | var include = rule.include;
|
353 | if (include) {
|
354 | if (typeof (include) !== 'string') {
|
355 | throw monarchCommon.createError(lexer, 'an \'include\' attribute must be a string at: ' + state);
|
356 | }
|
357 | if (include[0] === '@') {
|
358 | include = include.substr(1);
|
359 | }
|
360 | if (!json.tokenizer[include]) {
|
361 | throw monarchCommon.createError(lexer, 'include target \'' + include + '\' is not defined at: ' + state);
|
362 | }
|
363 | addRules(state + '.' + include, newrules, json.tokenizer[include]);
|
364 | }
|
365 | else {
|
366 | var newrule = new Rule(state);
|
367 | if (Array.isArray(rule) && rule.length >= 1 && rule.length <= 3) {
|
368 | newrule.setRegex(lexerMin, rule[0]);
|
369 | if (rule.length >= 3) {
|
370 | if (typeof (rule[1]) === 'string') {
|
371 | newrule.setAction(lexerMin, { token: rule[1], next: rule[2] });
|
372 | }
|
373 | else if (typeof (rule[1]) === 'object') {
|
374 | var rule1 = rule[1];
|
375 | rule1.next = rule[2];
|
376 | newrule.setAction(lexerMin, rule1);
|
377 | }
|
378 | else {
|
379 | throw monarchCommon.createError(lexer, 'a next state as the last element of a rule can only be given if the action is either an object or a string, at: ' + state);
|
380 | }
|
381 | }
|
382 | else {
|
383 | newrule.setAction(lexerMin, rule[1]);
|
384 | }
|
385 | }
|
386 | else {
|
387 | if (!rule.regex) {
|
388 | throw monarchCommon.createError(lexer, 'a rule must either be an array, or an object with a \'regex\' or \'include\' field at: ' + state);
|
389 | }
|
390 | if (rule.name) {
|
391 | if (typeof rule.name === 'string') {
|
392 | newrule.name = rule.name;
|
393 | }
|
394 | }
|
395 | if (rule.matchOnlyAtStart) {
|
396 | newrule.matchOnlyAtLineStart = bool(rule.matchOnlyAtLineStart, false);
|
397 | }
|
398 | newrule.setRegex(lexerMin, rule.regex);
|
399 | newrule.setAction(lexerMin, rule.action);
|
400 | }
|
401 | newrules.push(newrule);
|
402 | }
|
403 | }
|
404 | }
|
405 | if (!json.tokenizer || typeof (json.tokenizer) !== 'object') {
|
406 | throw monarchCommon.createError(lexer, 'a language definition must define the \'tokenizer\' attribute as an object');
|
407 | }
|
408 | lexer.tokenizer = [];
|
409 | for (var key in json.tokenizer) {
|
410 | if (json.tokenizer.hasOwnProperty(key)) {
|
411 | if (!lexer.start) {
|
412 | lexer.start = key;
|
413 | }
|
414 | var rules = json.tokenizer[key];
|
415 | lexer.tokenizer[key] = new Array();
|
416 | addRules('tokenizer.' + key, lexer.tokenizer[key], rules);
|
417 | }
|
418 | }
|
419 | lexer.usesEmbedded = lexerMin.usesEmbedded;
|
420 | if (json.brackets) {
|
421 | if (!(Array.isArray(json.brackets))) {
|
422 | throw monarchCommon.createError(lexer, 'the \'brackets\' attribute must be defined as an array');
|
423 | }
|
424 | }
|
425 | else {
|
426 | json.brackets = [
|
427 | { open: '{', close: '}', token: 'delimiter.curly' },
|
428 | { open: '[', close: ']', token: 'delimiter.square' },
|
429 | { open: '(', close: ')', token: 'delimiter.parenthesis' },
|
430 | { open: '<', close: '>', token: 'delimiter.angle' }
|
431 | ];
|
432 | }
|
433 | var brackets = [];
|
434 | for (var _i = 0, _a = json.brackets; _i < _a.length; _i++) {
|
435 | var el = _a[_i];
|
436 | var desc = el;
|
437 | if (desc && Array.isArray(desc) && desc.length === 3) {
|
438 | desc = { token: desc[2], open: desc[0], close: desc[1] };
|
439 | }
|
440 | if (desc.open === desc.close) {
|
441 | throw monarchCommon.createError(lexer, 'open and close brackets in a \'brackets\' attribute must be different: ' + desc.open +
|
442 | '\n hint: use the \'bracket\' attribute if matching on equal brackets is required.');
|
443 | }
|
444 | if (typeof desc.open === 'string' && typeof desc.token === 'string' && typeof desc.close === 'string') {
|
445 | brackets.push({
|
446 | token: desc.token + lexer.tokenPostfix,
|
447 | open: monarchCommon.fixCase(lexer, desc.open),
|
448 | close: monarchCommon.fixCase(lexer, desc.close)
|
449 | });
|
450 | }
|
451 | else {
|
452 | throw monarchCommon.createError(lexer, 'every element in the \'brackets\' array must be a \'{open,close,token}\' object or array');
|
453 | }
|
454 | }
|
455 | lexer.brackets = brackets;
|
456 | lexer.noThrow = true;
|
457 | return lexer;
|
458 | }
|