1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 | import * as monarchCommon from './common';
|
12 | import { IMonarchLanguage, IMonarchLanguageBracket } from './types';
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 | function isArrayOf(elemType: (x: any) => boolean, obj: any): boolean {
|
24 | if (!obj) {
|
25 | return false;
|
26 | }
|
27 | if (!(Array.isArray(obj))) {
|
28 | return false;
|
29 | }
|
30 | for (const el of obj) {
|
31 | if (!(elemType(el))) {
|
32 | return false;
|
33 | }
|
34 | }
|
35 | return true;
|
36 | }
|
37 |
|
38 | function bool(prop: any, defValue: boolean): boolean {
|
39 | if (typeof prop === 'boolean') {
|
40 | return prop;
|
41 | }
|
42 | return defValue;
|
43 | }
|
44 |
|
45 | function string(prop: any, defValue: string): string {
|
46 | if (typeof (prop) === 'string') {
|
47 | return prop;
|
48 | }
|
49 | return defValue;
|
50 | }
|
51 |
|
52 |
|
53 | function arrayToHash(array: string[]): { [name: string]: true } {
|
54 | const result: any = {};
|
55 | for (const e of array) {
|
56 | result[e] = true;
|
57 | }
|
58 | return result;
|
59 | }
|
60 |
|
61 |
|
62 | function createKeywordMatcher(arr: string[], caseInsensitive: boolean = false): (str: string) => boolean {
|
63 | if (caseInsensitive) {
|
64 | arr = arr.map(function (x) { return x.toLowerCase(); });
|
65 | }
|
66 | const hash = arrayToHash(arr);
|
67 | if (caseInsensitive) {
|
68 | return function (word) {
|
69 | return hash[word.toLowerCase()] !== undefined && hash.hasOwnProperty(word.toLowerCase());
|
70 | };
|
71 | } else {
|
72 | return function (word) {
|
73 | return hash[word] !== undefined && hash.hasOwnProperty(word);
|
74 | };
|
75 | }
|
76 | }
|
77 |
|
78 |
|
79 |
|
80 |
|
81 |
|
82 |
|
83 |
|
84 |
|
85 | function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
|
86 | let n = 0;
|
87 | while (str.indexOf('@') >= 0 && n < 5) {
|
88 | n++;
|
89 | str = str.replace(/@(\w+)/g, function (s, attr?) {
|
90 | let sub = '';
|
91 | if (typeof (lexer[attr]) === 'string') {
|
92 | sub = lexer[attr];
|
93 | } else if (lexer[attr] && lexer[attr] instanceof RegExp) {
|
94 | sub = lexer[attr].source;
|
95 | } else {
|
96 | if (lexer[attr] === undefined) {
|
97 | throw monarchCommon.createError(lexer, 'language definition does not contain attribute \'' + attr + '\', used at: ' + str);
|
98 | } else {
|
99 | throw monarchCommon.createError(lexer, 'attribute reference \'' + attr + '\' must be a string, used at: ' + str);
|
100 | }
|
101 | }
|
102 | return (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
|
103 | });
|
104 | }
|
105 |
|
106 | return new RegExp(str, (lexer.ignoreCase ? 'i' : ''));
|
107 | }
|
108 |
|
109 |
|
110 |
|
111 |
|
112 |
|
113 |
|
114 | function selectScrutinee(id: string, matches: string[], state: string, num: number): string | null {
|
115 | if (num < 0) {
|
116 | return id;
|
117 | }
|
118 | if (num < matches.length) {
|
119 | return matches[num];
|
120 | }
|
121 | if (num >= 100) {
|
122 | num = num - 100;
|
123 | let parts = state.split('.');
|
124 | parts.unshift(state);
|
125 | if (num < parts.length) {
|
126 | return parts[num];
|
127 | }
|
128 | }
|
129 | return null;
|
130 | }
|
131 |
|
132 | function createGuard(lexer: monarchCommon.ILexerMin, ruleName: string, tkey: string, val: monarchCommon.FuzzyAction): monarchCommon.IBranch {
|
133 |
|
134 | let scrut = -1;
|
135 | let oppat = tkey;
|
136 | let matches = tkey.match(/^\$(([sS]?)(\d\d?)|#)(.*)$/);
|
137 | if (matches) {
|
138 | if (matches[3]) {
|
139 | scrut = parseInt(matches[3]);
|
140 | if (matches[2]) {
|
141 | scrut = scrut + 100;
|
142 | }
|
143 | }
|
144 | oppat = matches[4];
|
145 | }
|
146 |
|
147 | let op = '~';
|
148 | let pat = oppat;
|
149 | if (!oppat || oppat.length === 0) {
|
150 | op = '!=';
|
151 | pat = '';
|
152 | }
|
153 | else if (/^\w*$/.test(pat)) {
|
154 | op = '==';
|
155 | }
|
156 | else {
|
157 | matches = oppat.match(/^(@|!@|~|!~|==|!=)(.*)$/);
|
158 | if (matches) {
|
159 | op = matches[1];
|
160 | pat = matches[2];
|
161 | }
|
162 | }
|
163 |
|
164 |
|
165 | let tester: (s: string, id: string, matches: string[], state: string, eos: boolean) => boolean;
|
166 |
|
167 |
|
168 | if ((op === '~' || op === '!~') && /^(\w|\|)*$/.test(pat)) {
|
169 | let inWords = createKeywordMatcher(pat.split('|'), lexer.ignoreCase);
|
170 | tester = function (s) { return (op === '~' ? inWords(s) : !inWords(s)); };
|
171 | }
|
172 | else if (op === '@' || op === '!@') {
|
173 | let words = lexer[pat];
|
174 | if (!words) {
|
175 | throw monarchCommon.createError(lexer, 'the @ match target \'' + pat + '\' is not defined, in rule: ' + ruleName);
|
176 | }
|
177 | if (!(isArrayOf(function (elem) { return (typeof (elem) === 'string'); }, words))) {
|
178 | throw monarchCommon.createError(lexer, 'the @ match target \'' + pat + '\' must be an array of strings, in rule: ' + ruleName);
|
179 | }
|
180 | let inWords = createKeywordMatcher(words, lexer.ignoreCase);
|
181 | tester = function (s) { return (op === '@' ? inWords(s) : !inWords(s)); };
|
182 | }
|
183 | else if (op === '~' || op === '!~') {
|
184 | if (pat.indexOf('$') < 0) {
|
185 |
|
186 | let re = compileRegExp(lexer, '^' + pat + '$');
|
187 | tester = function (s) { return (op === '~' ? re.test(s) : !re.test(s)); };
|
188 | }
|
189 | else {
|
190 | tester = function (s, id, matches, state) {
|
191 | let re = compileRegExp(lexer, '^' + monarchCommon.substituteMatches(lexer, pat, id, matches, state) + '$');
|
192 | return re.test(s);
|
193 | };
|
194 | }
|
195 | }
|
196 | else {
|
197 | if (pat.indexOf('$') < 0) {
|
198 | let patx = monarchCommon.fixCase(lexer, pat);
|
199 | tester = function (s) { return (op === '==' ? s === patx : s !== patx); };
|
200 | }
|
201 | else {
|
202 | let patx = monarchCommon.fixCase(lexer, pat);
|
203 | tester = function (s, id, matches, state, eos) {
|
204 | let patexp = monarchCommon.substituteMatches(lexer, patx, id, matches, state);
|
205 | return (op === '==' ? s === patexp : s !== patexp);
|
206 | };
|
207 | }
|
208 | }
|
209 |
|
210 |
|
211 | if (scrut === -1) {
|
212 | return {
|
213 | name: tkey, value: val, test: function (id, matches, state, eos) {
|
214 | return tester(id, id, matches, state, eos);
|
215 | }
|
216 | };
|
217 | }
|
218 | else {
|
219 | return {
|
220 | name: tkey, value: val, test: function (id, matches, state, eos) {
|
221 | let scrutinee = selectScrutinee(id, matches, state, scrut);
|
222 | return tester(!scrutinee ? '' : scrutinee, id, matches, state, eos);
|
223 | }
|
224 | };
|
225 | }
|
226 | }
|
227 |
|
228 |
|
229 |
|
230 |
|
231 |
|
232 |
|
233 |
|
234 |
|
235 |
|
236 | function compileAction(lexer: monarchCommon.ILexerMin, ruleName: string, action: any): monarchCommon.FuzzyAction {
|
237 | if (!action) {
|
238 | return { token: '' };
|
239 | }
|
240 | else if (typeof (action) === 'string') {
|
241 | return action;
|
242 | }
|
243 | else if (action.token || action.token === '') {
|
244 | if (typeof (action.token) !== 'string') {
|
245 | throw monarchCommon.createError(lexer, 'a \'token\' attribute must be of type string, in rule: ' + ruleName);
|
246 | }
|
247 | else {
|
248 |
|
249 | let newAction: monarchCommon.IAction = { token: action.token };
|
250 | if (action.token.indexOf('$') >= 0) {
|
251 | newAction.tokenSubst = true;
|
252 |
|
253 | }
|
254 | if (typeof (action.bracket) === 'string') {
|
255 | if (action.bracket === '@open') {
|
256 | newAction.bracket = monarchCommon.MonarchBracket.Open;
|
257 | } else if (action.bracket === '@close') {
|
258 | newAction.bracket = monarchCommon.MonarchBracket.Close;
|
259 | } else {
|
260 | throw monarchCommon.createError(lexer, 'a \'bracket\' attribute must be either \'@open\' or \'@close\', in rule: ' + ruleName);
|
261 | }
|
262 | }
|
263 | if (action.next) {
|
264 | if (typeof (action.next) !== 'string') {
|
265 | throw monarchCommon.createError(lexer, 'the next state must be a string value in rule: ' + ruleName);
|
266 | }
|
267 | else {
|
268 | let next: string = action.next;
|
269 | if (!/^(@pop|@push|@popall)$/.test(next)) {
|
270 | if (next[0] === '@') {
|
271 | next = next.substr(1);
|
272 | }
|
273 | if (next.indexOf('$') < 0) {
|
274 | if (!monarchCommon.stateExists(lexer, monarchCommon.substituteMatches(lexer, next, '', [], ''))) {
|
275 | throw monarchCommon.createError(lexer, 'the next state \'' + action.next + '\' is not defined in rule: ' + ruleName);
|
276 | }
|
277 | }
|
278 | }
|
279 | newAction.next = next;
|
280 | }
|
281 | }
|
282 | if (typeof (action.goBack) === 'number') {
|
283 | newAction.goBack = action.goBack;
|
284 | }
|
285 | if (typeof (action.switchTo) === 'string') {
|
286 | newAction.switchTo = action.switchTo;
|
287 | }
|
288 | if (typeof (action.log) === 'string') {
|
289 | newAction.log = action.log;
|
290 | }
|
291 | if (typeof (action._push) === 'string') {
|
292 | newAction._push = action._push;
|
293 | }
|
294 | if (typeof (action._pop) === 'string') {
|
295 | newAction._pop = action._pop;
|
296 | }
|
297 | if (typeof (action.mark) === 'string') {
|
298 | newAction.mark = action.mark;
|
299 | }
|
300 | if (typeof (action.fn) === 'string') {
|
301 | newAction.fn = action.fn;
|
302 | }
|
303 | if (typeof (action.nextEmbedded) === 'string') {
|
304 | newAction.nextEmbedded = action.nextEmbedded;
|
305 | lexer.usesEmbedded = true;
|
306 | }
|
307 | return newAction;
|
308 | }
|
309 | }
|
310 | else if (Array.isArray(action)) {
|
311 | let results: monarchCommon.FuzzyAction[] = [];
|
312 | for (let i = 0, len = action.length; i < len; i++) {
|
313 | results[i] = compileAction(lexer, ruleName, action[i]);
|
314 | }
|
315 | return { group: results };
|
316 | }
|
317 | else if (action.cases) {
|
318 |
|
319 | let cases: monarchCommon.IBranch[] = [];
|
320 |
|
321 |
|
322 | for (let tkey in action.cases) {
|
323 | if (action.cases.hasOwnProperty(tkey)) {
|
324 | const val = compileAction(lexer, ruleName, action.cases[tkey]);
|
325 |
|
326 |
|
327 | if (tkey === '@default' || tkey === '@' || tkey === '') {
|
328 | cases.push({ test: undefined, value: val, name: tkey });
|
329 | }
|
330 | else if (tkey === '@eos') {
|
331 | cases.push({ test: function (id, matches, state, eos) { return eos; }, value: val, name: tkey });
|
332 | }
|
333 | else {
|
334 | cases.push(createGuard(lexer, ruleName, tkey, val));
|
335 | }
|
336 | }
|
337 | }
|
338 |
|
339 |
|
340 | const def = lexer.defaultToken;
|
341 | return {
|
342 | test: function (id, matches, state, eos) {
|
343 | for (const _case of cases) {
|
344 | const didmatch = (!_case.test || _case.test(id, matches, state, eos));
|
345 | if (didmatch) {
|
346 | return _case.value;
|
347 | }
|
348 | }
|
349 | return def;
|
350 | }
|
351 | };
|
352 | }
|
353 | else {
|
354 | throw monarchCommon.createError(lexer, 'an action must be a string, an object with a \'token\' or \'cases\' attribute, or an array of actions; in rule: ' + ruleName);
|
355 | }
|
356 | }
|
357 |
|
358 |
|
359 |
|
360 |
|
361 | class Rule implements monarchCommon.IRule {
|
362 | public regex: RegExp = new RegExp('');
|
363 | public action: monarchCommon.FuzzyAction = { token: '' };
|
364 | public matchOnlyAtLineStart: boolean = false;
|
365 | public name: string = '';
|
366 | public stats: any;
|
367 | public string?: string;
|
368 |
|
369 | constructor(name: string) {
|
370 | this.name = name;
|
371 | this.stats = {time:0,count:0,hits:0};
|
372 | }
|
373 |
|
374 | public setRegex(lexer: monarchCommon.ILexerMin, re: string | RegExp): void {
|
375 | let sregex: string;
|
376 | if (typeof (re) === 'string') {
|
377 | sregex = re;
|
378 | }
|
379 | else if (re instanceof RegExp) {
|
380 | sregex = (<RegExp>re).source;
|
381 | }
|
382 | else {
|
383 | throw monarchCommon.createError(lexer, 'rules must start with a match string or regular expression: ' + this.name);
|
384 | }
|
385 |
|
386 | if(sregex.length == 2 && sregex[0] == '\\' && (/[\{\}\(\)\[\]]/).test(sregex[1])){
|
387 | this.string = sregex[1];
|
388 | }
|
389 |
|
390 | this.matchOnlyAtLineStart = (sregex.length > 0 && sregex[0] === '^');
|
391 | this.name = this.name + ': ' + sregex;
|
392 | this.regex = compileRegExp(lexer, '^(?:' + (this.matchOnlyAtLineStart ? sregex.substr(1) : sregex) + ')');
|
393 |
|
394 | }
|
395 |
|
396 | public setAction(lexer: monarchCommon.ILexerMin, act: monarchCommon.IAction) {
|
397 | this.action = compileAction(lexer, this.name, act);
|
398 | }
|
399 | }
|
400 |
|
401 |
|
402 |
|
403 |
|
404 |
|
405 |
|
406 |
|
407 |
|
408 |
|
409 |
|
410 | export function compile(languageId: string, json: IMonarchLanguage): monarchCommon.ILexer {
|
411 | if (!json || typeof (json) !== 'object') {
|
412 | throw new Error('Monarch: expecting a language definition object');
|
413 | }
|
414 |
|
415 |
|
416 | let lexer: monarchCommon.ILexer = <monarchCommon.ILexer>{};
|
417 | lexer.languageId = languageId;
|
418 | lexer.noThrow = false;
|
419 | lexer.maxStack = 100;
|
420 |
|
421 |
|
422 | lexer.start = (typeof json.start === 'string' ? json.start : null);
|
423 | lexer.ignoreCase = bool(json.ignoreCase, false);
|
424 |
|
425 | lexer.tokenPostfix = string(json.tokenPostfix, '.' + lexer.languageId);
|
426 | lexer.defaultToken = string(json.defaultToken, 'source');
|
427 |
|
428 | lexer.usesEmbedded = false;
|
429 |
|
430 |
|
431 | let lexerMin: monarchCommon.ILexerMin = <any>json;
|
432 | lexerMin.languageId = languageId;
|
433 | lexerMin.ignoreCase = lexer.ignoreCase;
|
434 | lexerMin.noThrow = lexer.noThrow;
|
435 | lexerMin.usesEmbedded = lexer.usesEmbedded;
|
436 | lexerMin.stateNames = json.tokenizer;
|
437 | lexerMin.defaultToken = lexer.defaultToken;
|
438 |
|
439 |
|
440 |
|
441 | function addRules(state: string, newrules: monarchCommon.IRule[], rules: any[]) {
|
442 | for (const rule of rules) {
|
443 |
|
444 | let include = rule.include;
|
445 | if (include) {
|
446 | if (typeof (include) !== 'string') {
|
447 | throw monarchCommon.createError(lexer, 'an \'include\' attribute must be a string at: ' + state);
|
448 | }
|
449 | if (include[0] === '@') {
|
450 | include = include.substr(1);
|
451 | }
|
452 | if (!json.tokenizer[include]) {
|
453 | throw monarchCommon.createError(lexer, 'include target \'' + include + '\' is not defined at: ' + state);
|
454 | }
|
455 | addRules(state + '.' + include, newrules, json.tokenizer[include]);
|
456 | }
|
457 | else {
|
458 | const newrule = new Rule(state);
|
459 |
|
460 |
|
461 | if (Array.isArray(rule) && rule.length >= 1 && rule.length <= 3) {
|
462 | newrule.setRegex(lexerMin, rule[0]);
|
463 | if (rule.length >= 3) {
|
464 | if (typeof (rule[1]) === 'string') {
|
465 | newrule.setAction(lexerMin, { token: rule[1], next: rule[2] });
|
466 | }
|
467 | else if (typeof (rule[1]) === 'object') {
|
468 | const rule1 = rule[1];
|
469 | rule1.next = rule[2];
|
470 | newrule.setAction(lexerMin, rule1);
|
471 | }
|
472 | else {
|
473 | throw monarchCommon.createError(lexer, 'a next state as the last element of a rule can only be given if the action is either an object or a string, at: ' + state);
|
474 | }
|
475 | }
|
476 | else {
|
477 | newrule.setAction(lexerMin, rule[1]);
|
478 | }
|
479 | }
|
480 | else {
|
481 | if (!rule.regex) {
|
482 | throw monarchCommon.createError(lexer, 'a rule must either be an array, or an object with a \'regex\' or \'include\' field at: ' + state);
|
483 | }
|
484 | if (rule.name) {
|
485 | if (typeof rule.name === 'string') {
|
486 | newrule.name = rule.name;
|
487 | }
|
488 | }
|
489 | if (rule.matchOnlyAtStart) {
|
490 | newrule.matchOnlyAtLineStart = bool(rule.matchOnlyAtLineStart, false);
|
491 | }
|
492 | newrule.setRegex(lexerMin, rule.regex);
|
493 | newrule.setAction(lexerMin, rule.action);
|
494 | }
|
495 |
|
496 | newrules.push(newrule);
|
497 | }
|
498 | }
|
499 | }
|
500 |
|
501 |
|
502 | if (!json.tokenizer || typeof (json.tokenizer) !== 'object') {
|
503 | throw monarchCommon.createError(lexer, 'a language definition must define the \'tokenizer\' attribute as an object');
|
504 | }
|
505 |
|
506 | lexer.tokenizer = <any>[];
|
507 | for (let key in json.tokenizer) {
|
508 | if (json.tokenizer.hasOwnProperty(key)) {
|
509 | if (!lexer.start) {
|
510 | lexer.start = key;
|
511 | }
|
512 |
|
513 | const rules = json.tokenizer[key];
|
514 | lexer.tokenizer[key] = new Array();
|
515 | addRules('tokenizer.' + key, lexer.tokenizer[key], rules);
|
516 | }
|
517 | }
|
518 | lexer.usesEmbedded = lexerMin.usesEmbedded;
|
519 |
|
520 |
|
521 | if (json.brackets) {
|
522 | if (!(Array.isArray(<any>json.brackets))) {
|
523 | throw monarchCommon.createError(lexer, 'the \'brackets\' attribute must be defined as an array');
|
524 | }
|
525 | }
|
526 | else {
|
527 | json.brackets = [
|
528 | { open: '{', close: '}', token: 'delimiter.curly' },
|
529 | { open: '[', close: ']', token: 'delimiter.square' },
|
530 | { open: '(', close: ')', token: 'delimiter.parenthesis' },
|
531 | { open: '<', close: '>', token: 'delimiter.angle' }];
|
532 | }
|
533 | let brackets: IMonarchLanguageBracket[] = [];
|
534 | for (let el of json.brackets) {
|
535 | let desc: any = el;
|
536 | if (desc && Array.isArray(desc) && desc.length === 3) {
|
537 | desc = { token: desc[2], open: desc[0], close: desc[1] };
|
538 | }
|
539 | if (desc.open === desc.close) {
|
540 | throw monarchCommon.createError(lexer, 'open and close brackets in a \'brackets\' attribute must be different: ' + desc.open +
|
541 | '\n hint: use the \'bracket\' attribute if matching on equal brackets is required.');
|
542 | }
|
543 | if (typeof desc.open === 'string' && typeof desc.token === 'string' && typeof desc.close === 'string') {
|
544 | brackets.push({
|
545 | token: desc.token + lexer.tokenPostfix,
|
546 | open: monarchCommon.fixCase(lexer, desc.open),
|
547 | close: monarchCommon.fixCase(lexer, desc.close)
|
548 | });
|
549 | }
|
550 | else {
|
551 | throw monarchCommon.createError(lexer, 'every element in the \'brackets\' array must be a \'{open,close,token}\' object or array');
|
552 | }
|
553 | }
|
554 | lexer.brackets = brackets;
|
555 |
|
556 |
|
557 | lexer.noThrow = true;
|
558 | return lexer;
|
559 | }
|