1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 | export const enum MonarchBracket {
|
18 | None = 0,
|
19 | Open = 1,
|
20 | Close = -1
|
21 | }
|
22 |
|
23 | export interface ILexerMin {
|
24 | languageId: string;
|
25 | noThrow: boolean;
|
26 | ignoreCase: boolean;
|
27 | usesEmbedded: boolean;
|
28 | defaultToken: string;
|
29 | stateNames: { [stateName: string]: any; };
|
30 | [attr: string]: any;
|
31 | }
|
32 |
|
33 | export interface ILexer extends ILexerMin {
|
34 | maxStack: number;
|
35 | start: string | null;
|
36 | ignoreCase: boolean;
|
37 | tokenPostfix: string;
|
38 |
|
39 | tokenizer: { [stateName: string]: IRule[]; };
|
40 | brackets: IBracket[];
|
41 | }
|
42 |
|
43 | export interface IBracket {
|
44 | token: string;
|
45 | open: string;
|
46 | close: string;
|
47 | }
|
48 |
|
49 | export type FuzzyAction = IAction | string;
|
50 |
|
51 | export function isFuzzyActionArr(what: FuzzyAction | FuzzyAction[]): what is FuzzyAction[] {
|
52 | return (Array.isArray(what));
|
53 | }
|
54 |
|
55 | export function isFuzzyAction(what: FuzzyAction | FuzzyAction[]): what is FuzzyAction {
|
56 | return !isFuzzyActionArr(what);
|
57 | }
|
58 |
|
59 | export function isString(what: FuzzyAction): what is string {
|
60 | return (typeof what === 'string');
|
61 | }
|
62 |
|
63 | export function isIAction(what: FuzzyAction): what is IAction {
|
64 | return !isString(what);
|
65 | }
|
66 |
|
67 | export interface IRule {
|
68 | regex: RegExp;
|
69 | action: FuzzyAction;
|
70 | matchOnlyAtLineStart: boolean;
|
71 | name: string;
|
72 | stats?: any;
|
73 | string?: string;
|
74 | }
|
75 |
|
76 | export interface IAction {
|
77 |
|
78 | group?: FuzzyAction[];
|
79 |
|
80 |
|
81 | test?: (id: string, matches: string[], state: string, eos: boolean) => FuzzyAction;
|
82 |
|
83 |
|
84 | token?: string;
|
85 | tokenSubst?: boolean;
|
86 | next?: string;
|
87 | nextEmbedded?: string;
|
88 | bracket?: MonarchBracket;
|
89 | log?: string;
|
90 | switchTo?: string;
|
91 | goBack?: number;
|
92 | transform?: (states: string[]) => string[];
|
93 | mark?:string
|
94 | _push?:string
|
95 | _pop?:string
|
96 | fn?:Function
|
97 | }
|
98 |
|
99 | export interface IBranch {
|
100 | name: string;
|
101 | value: FuzzyAction;
|
102 | test?: (id: string, matches: string[], state: string, eos: boolean) => boolean;
|
103 | }
|
104 |
|
105 |
|
106 |
|
107 |
|
108 |
|
109 |
|
110 | export function empty(s: string): boolean {
|
111 | return (s ? false : true);
|
112 | }
|
113 |
|
114 |
|
115 |
|
116 |
|
117 | export function fixCase(lexer: ILexerMin, str: string): string {
|
118 | return (lexer.ignoreCase && str ? str.toLowerCase() : str);
|
119 | }
|
120 |
|
121 |
|
122 |
|
123 |
|
124 | export function sanitize(s: string) {
|
125 | return s.replace(/[&<>'"_]/g, '-');
|
126 | }
|
127 |
|
128 |
|
129 |
|
130 |
|
131 |
|
132 |
|
133 | export function log(lexer: ILexerMin, msg: string) {
|
134 | console.log(`${lexer.languageId}: ${msg}`);
|
135 | }
|
136 |
|
137 |
|
138 |
|
139 | export function createError(lexer: ILexerMin, msg: string): Error {
|
140 | return new Error(`${lexer.languageId}: ${msg}`);
|
141 | }
|
142 |
|
143 |
|
144 |
|
145 | const substitutionCache:{[key: string]:any} = {};
|
146 |
|
147 | export function compileSubstitution(str: string): any[] {
|
148 | const parts = [];
|
149 | let i = 0;
|
150 | let l = str.length;
|
151 | let part = '';
|
152 | let sub = 0;
|
153 | while(i < l){
|
154 | let chr = str[i++];
|
155 | if(chr == '$'){
|
156 | let next = str[i++];
|
157 |
|
158 | if(next == '$'){
|
159 | part += '$';
|
160 | continue;
|
161 | }
|
162 | if(part) parts.push(part);
|
163 | part = '';
|
164 | if(next == '#'){
|
165 | parts.push(0)
|
166 | }
|
167 | else if(next == 'S'){
|
168 | parts.push(parseInt(str[i++]) + 100)
|
169 | } else {
|
170 | parts.push(parseInt(next) + 1)
|
171 | }
|
172 | } else {
|
173 | part += chr;
|
174 | }
|
175 | }
|
176 | if(part) parts.push(part);
|
177 | substitutionCache[str] = parts;
|
178 | return parts;
|
179 | }
|
180 |
|
181 |
|
182 |
|
183 |
|
184 |
|
185 |
|
186 |
|
187 |
|
188 |
|
189 |
|
190 |
|
191 |
|
192 | export function substituteMatches(lexer: ILexerMin, str: string, id: string, matches: string[], state: string): string {
|
193 | let stateMatches: string[] | null = null;
|
194 |
|
195 | let parts = substitutionCache[str] || compileSubstitution(str);
|
196 | let out = ""
|
197 |
|
198 | for(let i = 0;i < parts.length;i++){
|
199 | let part = parts[i];
|
200 | if(typeof part == 'string'){
|
201 | out += part;
|
202 | } else if(part > 100){
|
203 | if (stateMatches === null) stateMatches = state.split('.');
|
204 | out += (stateMatches[part - 101] || '');
|
205 | } else if(part === 100) {
|
206 | out += state
|
207 | }
|
208 | else if(part === 0) {
|
209 | out += id
|
210 | }
|
211 | else if(part > 0) {
|
212 | out += matches[part - 1];
|
213 | }
|
214 | }
|
215 |
|
216 | return out;
|
217 | }
|
218 |
|
219 | export function substituteMatchesOld(lexer: ILexerMin, str: string, id: string, matches: string[], state: string): string {
|
220 | const re = /\$((\$)|(#)|(\d\d?)|[sS](\d\d?)|@(\w+))/g;
|
221 | let stateMatches: string[] | null = null;
|
222 |
|
223 | return str.replace(re, function (full, sub?, dollar?, hash?, n?, s?, attr?, ofs?, total?) {
|
224 | if (!empty(dollar)) {
|
225 | return '$';
|
226 | }
|
227 | if (!empty(hash)) {
|
228 | return fixCase(lexer, id);
|
229 | }
|
230 | if (!empty(n) && n < matches.length) {
|
231 | return fixCase(lexer, matches[n]);
|
232 | }
|
233 | if (!empty(attr) && lexer && typeof (lexer[attr]) === 'string') {
|
234 | return lexer[attr];
|
235 | }
|
236 | if (stateMatches === null) {
|
237 | stateMatches = state.split('.');
|
238 | stateMatches.unshift(state);
|
239 | }
|
240 | if (!empty(s) && s < stateMatches.length) {
|
241 | return fixCase(lexer, stateMatches[s]);
|
242 | }
|
243 | return '';
|
244 | });
|
245 | }
|
246 |
|
247 | const FIND_RULES_MAP:{[key: string]: string} = {};
|
248 |
|
249 |
|
250 |
|
251 |
|
252 | export function findRules(lexer: ILexer, inState: string): IRule[] | null {
|
253 | let state: string | null = inState;
|
254 | if(FIND_RULES_MAP[state]) {
|
255 | return lexer.tokenizer[FIND_RULES_MAP[state]];
|
256 | }
|
257 | while (state && state.length > 0) {
|
258 | const rules = lexer.tokenizer[state];
|
259 | if (rules) {
|
260 | FIND_RULES_MAP[inState] = state;
|
261 | return rules;
|
262 | }
|
263 |
|
264 | const idx = state.lastIndexOf('.');
|
265 | if (idx < 0) {
|
266 | state = null;
|
267 | } else {
|
268 | state = state.substr(0, idx);
|
269 | }
|
270 | }
|
271 | return null;
|
272 | }
|
273 |
|
274 |
|
275 |
|
276 |
|
277 |
|
278 |
|
279 | export function stateExists(lexer: ILexerMin, inState: string): boolean {
|
280 | let state: string | null = inState;
|
281 | while (state && state.length > 0) {
|
282 | const exist = lexer.stateNames[state];
|
283 | if (exist) {
|
284 | return true;
|
285 | }
|
286 |
|
287 | const idx = state.lastIndexOf('.');
|
288 | if (idx < 0) {
|
289 | state = null;
|
290 | } else {
|
291 | state = state.substr(0, idx);
|
292 | }
|
293 | }
|
294 | return false;
|
295 | }
|