UNPKG

20.5 kBPlain TextView Raw
1/*---------------------------------------------------------------------------------------------
2 * Copyright (c) Microsoft Corporation. All rights reserved.
3 * Licensed under the MIT License. See License.txt in the project root for license information.
4 *--------------------------------------------------------------------------------------------*/
5
6/**
7 * Create a syntax highighter with a fully declarative JSON style lexer description
8 * using regular expressions.
9 */
10
11import { Token, TokenizationResult } from './token';
12import { IState } from './types';
13import * as monarchCommon from './common';
14
15export interface ITokenizationSupport {
16
17 getInitialState(): IState;
18
19 // add offsetDelta to each of the returned indices
20 tokenize(line: string, state: IState, offsetDelta: number): TokenizationResult;
21}
22
23const CACHE_STACK_DEPTH = 10;
24
25function statePart(state: string, index: number): string {
26 return state.split('.')[index];
27}
28
29
30/**
31 * Reuse the same stack elements up to a certain depth.
32 */
33class MonarchStackElementFactory {
34
35 private static readonly _INSTANCE = new MonarchStackElementFactory(CACHE_STACK_DEPTH);
36 public static create(parent: MonarchStackElement | null, state: string): MonarchStackElement {
37 return this._INSTANCE.create(parent, state);
38 }
39
40 private readonly _maxCacheDepth: number;
41 private readonly _entries: { [stackElementId: string]: MonarchStackElement; };
42
43 constructor(maxCacheDepth: number) {
44 this._maxCacheDepth = maxCacheDepth;
45 this._entries = Object.create(null);
46 }
47
48 public create(parent: MonarchStackElement | null, state: string): MonarchStackElement {
49 if (parent !== null && parent.depth >= this._maxCacheDepth) {
50 // no caching above a certain depth
51 return new MonarchStackElement(parent, state);
52 }
53 let stackElementId = MonarchStackElement.getStackElementId(parent);
54 if (stackElementId.length > 0) {
55 stackElementId += '|';
56 }
57 stackElementId += state;
58
59 let result = this._entries[stackElementId];
60 if (result) {
61 return result;
62 }
63 result = new MonarchStackElement(parent, state);
64 this._entries[stackElementId] = result;
65 return result;
66 }
67}
68
69class MonarchStackElement {
70
71 public readonly parent: MonarchStackElement | null;
72 public readonly state: string;
73 public readonly depth: number;
74
75 constructor(parent: MonarchStackElement | null, state: string) {
76 this.parent = parent;
77 this.state = state;
78 this.depth = (this.parent ? this.parent.depth : 0) + 1;
79 }
80
81 public static getStackElementId(element: MonarchStackElement | null): string {
82 let result = '';
83 while (element !== null) {
84 if (result.length > 0) {
85 result += '|';
86 }
87 result += element.state;
88 element = element.parent;
89 }
90 return result;
91 }
92
93 private static _equals(a: MonarchStackElement | null, b: MonarchStackElement | null): boolean {
94 while (a !== null && b !== null) {
95 if (a === b) {
96 return true;
97 }
98 if (a.state !== b.state) {
99 return false;
100 }
101 a = a.parent;
102 b = b.parent;
103 }
104 if (a === null && b === null) {
105 return true;
106 }
107 return false;
108 }
109
110 public get indent(): number {
111 return this.state.lastIndexOf('\t') - this.state.indexOf('\t');
112 }
113
114 public get scope(): string {
115 return this.part(2);
116 }
117
118 public get detail(): string {
119 return this.part(2);
120 }
121
122 public part(index: number): string {
123 return this.state.split('.')[index]
124 }
125
126 public equals(other: MonarchStackElement): boolean {
127 return MonarchStackElement._equals(this, other);
128 }
129
130 public push(state: string): MonarchStackElement {
131 return MonarchStackElementFactory.create(this, state);
132 }
133
134 public pop(): MonarchStackElement | null {
135 return this.parent;
136 }
137
138 public popall(): MonarchStackElement {
139 let result: MonarchStackElement = this;
140 while (result.parent) {
141 result = result.parent;
142 }
143 return result;
144 }
145
146 public switchTo(state: string): MonarchStackElement {
147 return MonarchStackElementFactory.create(this.parent, state);
148 }
149}
150
151/**
152 * Reuse the same line states up to a certain depth.
153 */
154class MonarchLineStateFactory {
155
156 private static readonly _INSTANCE = new MonarchLineStateFactory(CACHE_STACK_DEPTH);
157 public static create(stack: MonarchStackElement): MonarchLineState {
158 return this._INSTANCE.create(stack);
159 }
160
161 private readonly _maxCacheDepth: number;
162 private readonly _entries: { [stackElementId: string]: MonarchLineState; };
163
164 constructor(maxCacheDepth: number) {
165 this._maxCacheDepth = maxCacheDepth;
166 this._entries = Object.create(null);
167 }
168
169 public create(stack: MonarchStackElement): MonarchLineState {
170 if (stack !== null && stack.depth >= this._maxCacheDepth) {
171 // no caching above a certain depth
172 return new MonarchLineState(stack);
173 }
174 let stackElementId = MonarchStackElement.getStackElementId(stack);
175
176 let result = this._entries[stackElementId];
177 if (result) {
178 return result;
179 }
180 result = new MonarchLineState(stack);
181 this._entries[stackElementId] = result;
182 return result;
183 }
184}
185
186class MonarchLineState implements IState {
187
188 public readonly stack: MonarchStackElement;
189
190 constructor(
191 stack: MonarchStackElement
192 ) {
193 this.stack = stack;
194 }
195
196 public clone(): IState {
197 return MonarchLineStateFactory.create(this.stack);
198 }
199
200 public equals(other: IState): boolean {
201 if (!(other instanceof MonarchLineState)) {
202 return false;
203 }
204 if (!this.stack.equals(other.stack)) {
205 return false;
206 }
207 return true;
208 }
209}
210
211interface IMonarchTokensCollector {
212 enterMode(startOffset: number, modeId: string): void;
213 emit(startOffset: number, type: string, stack?: MonarchStackElement): Token;
214}
215
216class MonarchClassicTokensCollector implements IMonarchTokensCollector {
217
218 private _tokens: Token[];
219 private _language: string | null;
220 private _lastTokenType: string | null;
221 private _lastToken: Token;
222
223 constructor() {
224 this._tokens = [];
225 this._language = null;
226 this._lastToken = new Token(0, 'start', 'imba');
227 this._lastTokenType = null;
228 }
229
230 public enterMode(startOffset: number, modeId: string): void {
231 this._language = modeId;
232 }
233
234 public emit(startOffset: number, type: string, stack?: MonarchStackElement): Token {
235 if (this._lastTokenType === type && false) {
236 console.log('add to last token', type);
237 return this._lastToken;
238 }
239
240 let token = new Token(startOffset, type, this._language!);
241 this._lastTokenType = type;
242 this._lastToken = token;
243 this._tokens.push(token);
244 return token;
245 }
246
247 public finalize(endState: MonarchLineState): TokenizationResult {
248 return new TokenizationResult(this._tokens, endState);
249 }
250}
251
252export type ILoadStatus = { loaded: true; } | { loaded: false; promise: Promise<void>; };
253
254export class MonarchTokenizer implements ITokenizationSupport {
255
256 private readonly _modeId: string;
257 private readonly _lexer: monarchCommon.ILexer;
258 public _profile: boolean;
259
260 constructor(modeId: string, lexer: monarchCommon.ILexer) {
261 this._modeId = modeId;
262 this._lexer = lexer;
263 this._profile = false;
264 }
265
266 public dispose(): void {
267
268 }
269
270 public getLoadStatus(): ILoadStatus {
271 return { loaded: true };
272 }
273
274 public getInitialState(): IState {
275 let rootState = MonarchStackElementFactory.create(null, this._lexer.start!);
276 return MonarchLineStateFactory.create(rootState);
277 }
278
279 public tokenize(line: string, lineState: IState, offsetDelta: number): TokenizationResult {
280 let tokensCollector = new MonarchClassicTokensCollector();
281 let endLineState = this._tokenize(line, <MonarchLineState>lineState, offsetDelta, tokensCollector);
282 return tokensCollector.finalize(endLineState);
283 }
284
285 private _tokenize(line: string, lineState: MonarchLineState, offsetDelta: number, collector: IMonarchTokensCollector): MonarchLineState {
286 return this._myTokenize(line, lineState, offsetDelta, collector);
287 }
288
289 private _safeRuleName(rule: monarchCommon.IRule | null): string {
290 if (rule) {
291 return rule.name;
292 }
293 return '(unknown)';
294 }
295
296 private _rescope(from: string, to: string, tokens: string[], toState: string): void {
297 let a = (from || '').split('-'); // if-body
298 let b = (to || '').split('-'); // if
299
300 if (from == to) return;
301
302 let diff = 1;
303 // find out their common base
304 while (a[diff] && a[diff] == b[diff]) {
305 diff++
306 }
307 // console.log(`rescope ${from} -> ${to}`,a.length,b.length,diff);
308
309 let level = a.length;
310
311 while (level > diff) {
312 // console.log('popping',a[a.length - 1]);
313 tokens.push('pop.' + a[--level] + '.' + level);
314 }
315 while (b.length > diff) {
316 // console.log('pushing',b[diff]);
317 let id = 'push.' + b[diff++] + '.' + (diff - 1);
318 if (toState) {
319 let indent = statePart(toState, 1);
320 id += '.' + indent;
321 }
322 tokens.push(id);
323 }
324 }
325
326 private _myTokenize(line: string, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {
327 tokensCollector.enterMode(offsetDelta, this._modeId);
328
329 const lineLength = line.length;
330
331 let stack = lineState.stack;
332 let lastToken: any = null;
333 let pos = 0;
334 let profile = this._profile;
335
336 // regular expression group matching
337 // these never need cloning or equality since they are only used within a line match
338 interface GroupMatching {
339 matches: string[];
340 rule: monarchCommon.IRule | null;
341 groups: { action: monarchCommon.FuzzyAction; matched: string; }[];
342 }
343 let groupMatching: GroupMatching | null = null;
344
345 // See https://github.com/Microsoft/monaco-editor/issues/1235:
346 // Evaluate rules at least once for an empty line
347 let forceEvaluation = true;
348 let append: string[] = [];
349 let tries = 0;
350 let rules: monarchCommon.IRule[] | null = [];
351 let rulesState = null;
352 let hangPos = -1;
353
354 while (forceEvaluation || pos < lineLength) {
355 tries++;
356
357 if (tries > 1000) {
358
359 if (pos == hangPos) {
360 console.log('infinite recursion', pos, lineLength, stack, tokensCollector);
361 throw 'infinite recursion in tokenizer?';
362 } else {
363 hangPos = pos;
364 tries = 0;
365 }
366
367 }
368
369 const pos0 = pos;
370 const stackLen0 = stack.depth;
371 const groupLen0 = groupMatching ? groupMatching.groups.length : 0;
372 const state = stack.state;
373
374 let matches: string[] | null = null;
375 let matched: string | null = null;
376 let action: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;
377 let rule: monarchCommon.IRule | null = null;
378
379 // check if we need to process group matches first
380 if (groupMatching) {
381 matches = groupMatching.matches;
382 const groupEntry = groupMatching.groups.shift()!;
383 matched = groupEntry.matched;
384 action = groupEntry.action;
385 rule = groupMatching.rule;
386
387 // cleanup if necessary
388 if (groupMatching.groups.length === 0) {
389 groupMatching = null;
390 }
391 } else {
392 // otherwise we match on the token stream
393
394 if (!forceEvaluation && pos >= lineLength) {
395 // nothing to do
396 break;
397 }
398
399 forceEvaluation = false;
400
401 // if(state !== rulesState){
402 // get the rules for this state
403 rules = this._lexer.tokenizer[state];
404 if (!rules) {
405 rules = monarchCommon.findRules(this._lexer, state); // do parent matching
406 if (!rules) {
407 throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state);
408 }
409 }
410 // }
411
412 // try each rule until we match
413 let restOfLine = line.substr(pos);
414 for (const rule of rules) {
415 if (rule.string !== undefined) {
416 if (restOfLine[0] === rule.string) {
417 matches = [rule.string];
418 matched = rule.string;
419 action = rule.action;
420 break;
421 }
422 }
423 else if (pos === 0 || !rule.matchOnlyAtLineStart) {
424 if (profile) {
425 rule.stats.count++;
426 let now = performance.now();
427 matches = restOfLine.match(rule.regex);
428 rule.stats.time += (performance.now() - now);
429 if (matches) {
430 rule.stats.hits++;
431 }
432 } else {
433 matches = restOfLine.match(rule.regex);
434 }
435 if (matches) {
436 matched = matches[0];
437 action = rule.action;
438 break;
439 }
440 }
441 }
442 }
443
444 // We matched 'rule' with 'matches' and 'action'
445 if (!matches) {
446 matches = [''];
447 matched = '';
448 }
449
450 if (!action) {
451 // bad: we didn't match anything, and there is no action to take
452 // we need to advance the stream or we get progress trouble
453 if (pos < lineLength) {
454 matches = [line.charAt(pos)];
455 matched = matches[0];
456 }
457 action = this._lexer.defaultToken;
458 }
459
460 if (matched === null) {
461 // should never happen, needed for strict null checking
462 break;
463 }
464
465 // advance stream
466 pos += matched.length;
467
468 // maybe call action function (used for 'cases')
469 while (monarchCommon.isFuzzyAction(action) && monarchCommon.isIAction(action) && action.test) {
470 action = action.test(matched, matches, state, pos === lineLength);
471 }
472
473 let result: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;
474 // set the result: either a string or an array of actions
475 if (typeof action === 'string' || Array.isArray(action)) {
476 result = action;
477 } else if (action.group) {
478 result = action.group;
479 } else if (action.token !== null && action.token !== undefined) {
480
481 // do $n replacements?
482 if (action.tokenSubst) {
483 result = monarchCommon.substituteMatches(this._lexer, action.token, matched, matches, state);
484 } else {
485 result = action.token;
486 }
487
488 // state transformations
489 if (action.goBack) { // back up the stream..
490 pos = Math.max(0, pos - action.goBack);
491 }
492
493 if (action.switchTo && typeof action.switchTo === 'string') {
494 // let indenting = action.switchTo.indexOf('\t') > 0;
495 // if(indenting) tokensCollector.emit(pos0 + offsetDelta, 'push', stack);
496
497 // can do a quick check just for the action?
498
499 let nextState = monarchCommon.substituteMatches(this._lexer, action.switchTo, matched, matches, state); // switch state without a push...
500 if (nextState[0] === '@') {
501 nextState = nextState.substr(1); // peel off starting '@'
502 }
503
504 if (!monarchCommon.findRules(this._lexer, nextState)) {
505 throw monarchCommon.createError(this._lexer, 'trying to switch to a state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
506 } else {
507 let from = stack.scope;
508 let to = statePart(nextState, 2);
509 if (from !== to) this._rescope(from, to, append, nextState);
510 stack = stack.switchTo(nextState);
511 }
512 } else if (action.transform && typeof action.transform === 'function') {
513 throw monarchCommon.createError(this._lexer, 'action.transform not supported');
514 } else if (action.next) {
515 if (action.next === '@push') {
516 if (stack.depth >= this._lexer.maxStack) {
517 throw monarchCommon.createError(this._lexer, 'maximum tokenizer stack size reached: [' +
518 stack.state + ',' + stack.parent!.state + ',...]');
519 } else {
520 stack = stack.push(state);
521 }
522 } else if (action.next === '@pop') {
523 if (stack.depth <= 1) {
524 throw monarchCommon.createError(this._lexer, 'trying to pop an empty stack in rule: ' + this._safeRuleName(rule));
525 } else {
526 let prev = stack;
527 stack = stack.pop()!;
528 let from = statePart(prev.state, 2)
529 let to = statePart(stack.state, 2)
530 if (from !== to) this._rescope(from, to, append, stack.state);
531 }
532 } else if (action.next === '@popall') {
533 stack = stack.popall();
534 } else {
535 // let indenting = action.next.indexOf('\t') > 0;
536 // if(indenting) tokensCollector.emit(pos0 + offsetDelta, 'push', stack);
537 let nextState = monarchCommon.substituteMatches(this._lexer, action.next, matched, matches, state);
538
539 if (nextState[0] === '@') {
540 nextState = nextState.substr(1); // peel off starting '@'
541 }
542
543 let nextScope = statePart(nextState, 2);
544
545 if (!monarchCommon.findRules(this._lexer, nextState)) {
546 throw monarchCommon.createError(this._lexer, 'trying to set a next state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
547 } else {
548
549 if (nextScope != stack.scope) this._rescope(stack.scope || '', nextScope, append, nextState);
550 stack = stack.push(nextState);
551 }
552 }
553 }
554
555 if (action.log && typeof (action.log) === 'string') {
556 monarchCommon.log(this._lexer, this._lexer.languageId + ': ' + monarchCommon.substituteMatches(this._lexer, action.log, matched, matches, state));
557 }
558
559 if (action.mark) {
560 tokensCollector.emit(pos0 + offsetDelta, action.mark, stack);
561 }
562 }
563
564 // check result
565 if (result === null) {
566 throw monarchCommon.createError(this._lexer, 'lexer rule has no well-defined action in rule: ' + this._safeRuleName(rule));
567 }
568
569 // is the result a group match?
570 if (Array.isArray(result)) {
571 if (groupMatching && groupMatching.groups.length > 0) {
572 throw monarchCommon.createError(this._lexer, 'groups cannot be nested: ' + this._safeRuleName(rule));
573 }
574 if (matches.length !== result.length + 1) {
575 throw monarchCommon.createError(this._lexer, 'matched number of groups does not match the number of actions in rule: ' + this._safeRuleName(rule));
576 }
577 let totalLen = 0;
578 for (let i = 1; i < matches.length; i++) {
579 totalLen += matches[i].length;
580 }
581 if (totalLen !== matched.length) {
582 throw monarchCommon.createError(this._lexer, 'with groups, all characters should be matched in consecutive groups in rule: ' + this._safeRuleName(rule));
583 }
584
585 groupMatching = {
586 rule: rule,
587 matches: matches,
588 groups: []
589 };
590 for (let i = 0; i < result.length; i++) {
591 groupMatching.groups[i] = {
592 action: result[i],
593 matched: matches[i + 1]
594 };
595 }
596
597 pos -= matched.length;
598 // call recursively to initiate first result match
599 continue;
600 } else {
601 // regular result
602
603 // check for '@rematch'
604 if (result === '@rematch') {
605 pos -= matched.length;
606 matched = ''; // better set the next state too..
607 matches = null;
608 result = '';
609 }
610
611 // check progress
612 if (matched.length === 0) {
613 if (lineLength === 0 || stackLen0 !== stack.depth || state !== stack.state || (!groupMatching ? 0 : groupMatching.groups.length) !== groupLen0) {
614 if (typeof result == 'string' && result) tokensCollector.emit(pos + offsetDelta, result, stack);
615 while (append.length > 0) { tokensCollector.emit(pos + offsetDelta, append.shift() as string, stack); }
616 continue;
617 } else {
618 throw monarchCommon.createError(this._lexer, 'no progress in tokenizer in rule: ' + this._safeRuleName(rule));
619 }
620 }
621
622 // return the result (and check for brace matching)
623 // todo: for efficiency we could pre-sanitize tokenPostfix and substitutions
624 let tokenType: string | null = null;
625 if (monarchCommon.isString(result) && result.indexOf('@brackets') === 0) {
626 let rest = result.substr('@brackets'.length);
627 let bracket = findBracket(this._lexer, matched);
628 if (!bracket) {
629 throw monarchCommon.createError(this._lexer, '@brackets token returned but no bracket defined as: ' + matched);
630 }
631 tokenType = monarchCommon.sanitize(bracket.token + rest);
632 } else {
633 let token = (result === '' ? '' : result + this._lexer.tokenPostfix);
634 tokenType = monarchCommon.sanitize(token);
635 }
636
637 let token = tokensCollector.emit(pos0 + offsetDelta, tokenType, stack);
638 token.stack = stack;
639
640 if (lastToken && lastToken != token) {
641 lastToken.value = line.slice(lastToken.offset - offsetDelta, pos0);
642 }
643 lastToken = token;
644
645 while (append.length > 0) { tokensCollector.emit(pos + offsetDelta, append.shift() as string, stack); }
646 }
647 }
648
649 if (lastToken && !lastToken.value) {
650 lastToken.value = line.slice(lastToken.offset - offsetDelta);
651 }
652
653 return MonarchLineStateFactory.create(stack);
654 }
655}
656
657/**
658 * Searches for a bracket in the 'brackets' attribute that matches the input.
659 */
660function findBracket(lexer: monarchCommon.ILexer, matched: string) {
661 if (!matched) {
662 return null;
663 }
664 matched = monarchCommon.fixCase(lexer, matched);
665
666 let brackets = lexer.brackets;
667 for (const bracket of brackets) {
668 if (bracket.open === matched) {
669 return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Open };
670 }
671 else if (bracket.close === matched) {
672 return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Close };
673 }
674 }
675 return null;
676}
677
678export function createTokenizationSupport(modeId: string, lexer: monarchCommon.ILexer): ITokenizationSupport {
679 return new MonarchTokenizer(modeId, lexer);
680}