UNPKG

7.29 kBPlain TextView Raw
1/*---------------------------------------------------------------------------------------------
2 * Copyright (c) Microsoft Corporation. All rights reserved.
3 * Licensed under the MIT License. See License.txt in the project root for license information.
4 *--------------------------------------------------------------------------------------------*/
5
6/*
7 * This module exports common types and functionality shared between
8 * the Monarch compiler that compiles JSON to ILexer, and the Monarch
9 * Tokenizer (that highlights at runtime)
10 */
11
12/*
13 * Type definitions to be used internally to Monarch.
14 * Inside monarch we use fully typed definitions and compiled versions of the more abstract JSON descriptions.
15 */
16
17export const enum MonarchBracket {
18 None = 0,
19 Open = 1,
20 Close = -1
21}
22
23export interface ILexerMin {
24 languageId: string;
25 noThrow: boolean;
26 ignoreCase: boolean;
27 usesEmbedded: boolean;
28 defaultToken: string;
29 stateNames: { [stateName: string]: any; };
30 [attr: string]: any;
31}
32
33export interface ILexer extends ILexerMin {
34 maxStack: number;
35 start: string | null;
36 ignoreCase: boolean;
37 tokenPostfix: string;
38
39 tokenizer: { [stateName: string]: IRule[]; };
40 brackets: IBracket[];
41}
42
43export interface IBracket {
44 token: string;
45 open: string;
46 close: string;
47}
48
49export type FuzzyAction = IAction | string;
50
51export function isFuzzyActionArr(what: FuzzyAction | FuzzyAction[]): what is FuzzyAction[] {
52 return (Array.isArray(what));
53}
54
55export function isFuzzyAction(what: FuzzyAction | FuzzyAction[]): what is FuzzyAction {
56 return !isFuzzyActionArr(what);
57}
58
59export function isString(what: FuzzyAction): what is string {
60 return (typeof what === 'string');
61}
62
63export function isIAction(what: FuzzyAction): what is IAction {
64 return !isString(what);
65}
66
67export interface IRule {
68 regex: RegExp;
69 action: FuzzyAction;
70 matchOnlyAtLineStart: boolean;
71 name: string;
72 stats?: any;
73 string?: string;
74}
75
76export interface IAction {
77 // an action is either a group of actions
78 group?: FuzzyAction[];
79
80 // or a function that returns a fresh action
81 test?: (id: string, matches: string[], state: string, eos: boolean) => FuzzyAction;
82
83 // or it is a declarative action with a token value and various other attributes
84 token?: string;
85 tokenSubst?: boolean;
86 next?: string;
87 nextEmbedded?: string;
88 bracket?: MonarchBracket;
89 log?: string;
90 switchTo?: string;
91 goBack?: number;
92 transform?: (states: string[]) => string[];
93 mark?:string
94 _push?:string
95 _pop?:string
96 fn?:Function
97}
98
99export interface IBranch {
100 name: string;
101 value: FuzzyAction;
102 test?: (id: string, matches: string[], state: string, eos: boolean) => boolean;
103}
104
105// Small helper functions
106
107/**
108 * Is a string null, undefined, or empty?
109 */
110export function empty(s: string): boolean {
111 return (s ? false : true);
112}
113
114/**
115 * Puts a string to lower case if 'ignoreCase' is set.
116 */
117export function fixCase(lexer: ILexerMin, str: string): string {
118 return (lexer.ignoreCase && str ? str.toLowerCase() : str);
119}
120
121/**
122 * Ensures there are no bad characters in a CSS token class.
123 */
124export function sanitize(s: string) {
125 return s.replace(/[&<>'"_]/g, '-'); // used on all output token CSS classes
126}
127
128// Logging
129
130/**
131 * Logs a message.
132 */
133export function log(lexer: ILexerMin, msg: string) {
134 console.log(`${lexer.languageId}: ${msg}`);
135}
136
137// Throwing errors
138
139export function createError(lexer: ILexerMin, msg: string): Error {
140 return new Error(`${lexer.languageId}: ${msg}`);
141}
142
143// Helper functions for rule finding and substitution
144
145const substitutionCache:{[key: string]:any} = {};
146
147export function compileSubstitution(str: string): any[] {
148 const parts = [];
149 let i = 0;
150 let l = str.length;
151 let part = '';
152 let sub = 0;
153 while(i < l){
154 let chr = str[i++];
155 if(chr == '$'){
156 let next = str[i++];
157
158 if(next == '$'){
159 part += '$';
160 continue;
161 }
162 if(part) parts.push(part);
163 part = '';
164 if(next == '#'){
165 parts.push(0)
166 }
167 else if(next == 'S'){
168 parts.push(parseInt(str[i++]) + 100)
169 } else {
170 parts.push(parseInt(next) + 1)
171 }
172 } else {
173 part += chr;
174 }
175 }
176 if(part) parts.push(part);
177 substitutionCache[str] = parts;
178 return parts;
179}
180
181
182
183/**
184 * substituteMatches is used on lexer strings and can substitutes predefined patterns:
185 * $$ => $
186 * $# => id
187 * $n => matched entry n
188 * @attr => contents of lexer[attr]
189 *
190 * See documentation for more info
191 */
192export function substituteMatches(lexer: ILexerMin, str: string, id: string, matches: string[], state: string): string {
193 let stateMatches: string[] | null = null;
194 // let otherRes = substituteMatchesOld(lexer,str,id,matches,state);
195 let parts = substitutionCache[str] || compileSubstitution(str);
196 let out = ""
197
198 for(let i = 0;i < parts.length;i++){
199 let part = parts[i];
200 if(typeof part == 'string'){
201 out += part;
202 } else if(part > 100){
203 if (stateMatches === null) stateMatches = state.split('.');
204 out += (stateMatches[part - 101] || '');
205 } else if(part === 100) {
206 out += state
207 }
208 else if(part === 0) {
209 out += id
210 }
211 else if(part > 0) {
212 out += matches[part - 1];
213 }
214 }
215 // if(out !== otherRes){ console.log('mismatch',[str,out,otherRes]); }
216 return out;
217}
218
219export function substituteMatchesOld(lexer: ILexerMin, str: string, id: string, matches: string[], state: string): string {
220 const re = /\$((\$)|(#)|(\d\d?)|[sS](\d\d?)|@(\w+))/g;
221 let stateMatches: string[] | null = null;
222
223 return str.replace(re, function (full, sub?, dollar?, hash?, n?, s?, attr?, ofs?, total?) {
224 if (!empty(dollar)) {
225 return '$'; // $$
226 }
227 if (!empty(hash)) {
228 return fixCase(lexer, id); // default $#
229 }
230 if (!empty(n) && n < matches.length) {
231 return fixCase(lexer, matches[n]); // $n
232 }
233 if (!empty(attr) && lexer && typeof (lexer[attr]) === 'string') {
234 return lexer[attr]; //@attribute
235 }
236 if (stateMatches === null) { // split state on demand
237 stateMatches = state.split('.');
238 stateMatches.unshift(state);
239 }
240 if (!empty(s) && s < stateMatches.length) {
241 return fixCase(lexer, stateMatches[s]); //$Sn
242 }
243 return '';
244 });
245}
246
247const FIND_RULES_MAP:{[key: string]: string} = {};
248
249/**
250 * Find the tokenizer rules for a specific state (i.e. next action)
251 */
252export function findRules(lexer: ILexer, inState: string): IRule[] | null {
253 let state: string | null = inState;
254 if(FIND_RULES_MAP[state]) {
255 return lexer.tokenizer[FIND_RULES_MAP[state]];
256 }
257 while (state && state.length > 0) {
258 const rules = lexer.tokenizer[state];
259 if (rules) {
260 FIND_RULES_MAP[inState] = state;
261 return rules;
262 }
263
264 const idx = state.lastIndexOf('.');
265 if (idx < 0) {
266 state = null; // no further parent
267 } else {
268 state = state.substr(0, idx);
269 }
270 }
271 return null;
272}
273
274/**
275 * Is a certain state defined? In contrast to 'findRules' this works on a ILexerMin.
276 * This is used during compilation where we may know the defined states
277 * but not yet whether the corresponding rules are correct.
278 */
279export function stateExists(lexer: ILexerMin, inState: string): boolean {
280 let state: string | null = inState;
281 while (state && state.length > 0) {
282 const exist = lexer.stateNames[state];
283 if (exist) {
284 return true;
285 }
286
287 const idx = state.lastIndexOf('.');
288 if (idx < 0) {
289 state = null; // no further parent
290 } else {
291 state = state.substr(0, idx);
292 }
293 }
294 return false;
295}