1 | /**
|
2 | * Prism: Lightweight, robust, elegant syntax highlighting
|
3 | *
|
4 | * @license MIT <https://opensource.org/licenses/MIT>
|
5 | * @author Lea Verou <https://lea.verou.me>
|
6 | * @namespace
|
7 | * @public
|
8 | */
|
9 | /**
|
10 | * prism-react-renderer:
|
11 | * This file has been modified to remove:
|
12 | * - globals and window dependency
|
13 | * - worker support
|
14 | * - highlightAll and other element dependent methods
|
15 | * - _.hooks helpers
|
16 | * - UMD/node-specific hacks
|
17 | * It has also been run through prettier
|
18 | */
|
19 |
|
20 | var Prism = (function () {
|
21 |
|
22 | // Private helper vars
|
23 | var lang = /(?:^|\s)lang(?:uage)?-([\w-]+)(?=\s|$)/i;
|
24 | var uniqueId = 0;
|
25 |
|
26 | // The grammar object for plaintext
|
27 | var plainTextGrammar = {};
|
28 |
|
29 |
|
30 | var _ = {
|
31 | /**
|
32 | * A namespace for utility methods.
|
33 | *
|
34 | * All function in this namespace that are not explicitly marked as _public_ are for __internal use only__ and may
|
35 | * change or disappear at any time.
|
36 | *
|
37 | * @namespace
|
38 | * @memberof Prism
|
39 | */
|
40 | util: {
|
41 | encode: function encode(tokens) {
|
42 | if (tokens instanceof Token) {
|
43 | return new Token(tokens.type, encode(tokens.content), tokens.alias);
|
44 | } else if (Array.isArray(tokens)) {
|
45 | return tokens.map(encode);
|
46 | } else {
|
47 | return tokens.replace(/&/g, '&').replace(/</g, '<').replace(/\u00a0/g, ' ');
|
48 | }
|
49 | },
|
50 |
|
51 | /**
|
52 | * Returns the name of the type of the given value.
|
53 | *
|
54 | * @param {any} o
|
55 | * @returns {string}
|
56 | * @example
|
57 | * type(null) === 'Null'
|
58 | * type(undefined) === 'Undefined'
|
59 | * type(123) === 'Number'
|
60 | * type('foo') === 'String'
|
61 | * type(true) === 'Boolean'
|
62 | * type([1, 2]) === 'Array'
|
63 | * type({}) === 'Object'
|
64 | * type(String) === 'Function'
|
65 | * type(/abc+/) === 'RegExp'
|
66 | */
|
67 | type: function (o) {
|
68 | return Object.prototype.toString.call(o).slice(8, -1);
|
69 | },
|
70 |
|
71 | /**
|
72 | * Returns a unique number for the given object. Later calls will still return the same number.
|
73 | *
|
74 | * @param {Object} obj
|
75 | * @returns {number}
|
76 | */
|
77 | objId: function (obj) {
|
78 | if (!obj['__id']) {
|
79 | Object.defineProperty(obj, '__id', { value: ++uniqueId });
|
80 | }
|
81 | return obj['__id'];
|
82 | },
|
83 |
|
84 | /**
|
85 | * Creates a deep clone of the given object.
|
86 | *
|
87 | * The main intended use of this function is to clone language definitions.
|
88 | *
|
89 | * @param {T} o
|
90 | * @param {Record<number, any>} [visited]
|
91 | * @returns {T}
|
92 | * @template T
|
93 | */
|
94 | clone: function deepClone(o, visited) {
|
95 | visited = visited || {};
|
96 |
|
97 | var clone; var id;
|
98 | switch (_.util.type(o)) {
|
99 | case 'Object':
|
100 | id = _.util.objId(o);
|
101 | if (visited[id]) {
|
102 | return visited[id];
|
103 | }
|
104 | clone = /** @type {Record<string, any>} */ ({});
|
105 | visited[id] = clone;
|
106 |
|
107 | for (var key in o) {
|
108 | if (o.hasOwnProperty(key)) {
|
109 | clone[key] = deepClone(o[key], visited);
|
110 | }
|
111 | }
|
112 |
|
113 | return /** @type {any} */ (clone);
|
114 |
|
115 | case 'Array':
|
116 | id = _.util.objId(o);
|
117 | if (visited[id]) {
|
118 | return visited[id];
|
119 | }
|
120 | clone = [];
|
121 | visited[id] = clone;
|
122 |
|
123 | (/** @type {Array} */(/** @type {any} */(o))).forEach(function (v, i) {
|
124 | clone[i] = deepClone(v, visited);
|
125 | });
|
126 |
|
127 | return /** @type {any} */ (clone);
|
128 |
|
129 | default:
|
130 | return o;
|
131 | }
|
132 | },
|
133 |
|
134 | /**
|
135 | * Returns the Prism language of the given element set by a `language-xxxx` or `lang-xxxx` class.
|
136 | *
|
137 | * If no language is set for the element or the element is `null` or `undefined`, `none` will be returned.
|
138 | *
|
139 | * @param {Element} element
|
140 | * @returns {string}
|
141 | */
|
142 | getLanguage: function (element) {
|
143 | while (element) {
|
144 | var m = lang.exec(element.className);
|
145 | if (m) {
|
146 | return m[1].toLowerCase();
|
147 | }
|
148 | element = element.parentElement;
|
149 | }
|
150 | return 'none';
|
151 | },
|
152 |
|
153 | /**
|
154 | * Sets the Prism `language-xxxx` class of the given element.
|
155 | *
|
156 | * @param {Element} element
|
157 | * @param {string} language
|
158 | * @returns {void}
|
159 | */
|
160 | setLanguage: function (element, language) {
|
161 | // remove all `language-xxxx` classes
|
162 | // (this might leave behind a leading space)
|
163 | element.className = element.className.replace(RegExp(lang, 'gi'), '');
|
164 |
|
165 | // add the new `language-xxxx` class
|
166 | // (using `classList` will automatically clean up spaces for us)
|
167 | element.classList.add('language-' + language);
|
168 | },
|
169 |
|
170 | /**
|
171 | * Returns whether a given class is active for `element`.
|
172 | *
|
173 | * The class can be activated if `element` or one of its ancestors has the given class and it can be deactivated
|
174 | * if `element` or one of its ancestors has the negated version of the given class. The _negated version_ of the
|
175 | * given class is just the given class with a `no-` prefix.
|
176 | *
|
177 | * Whether the class is active is determined by the closest ancestor of `element` (where `element` itself is
|
178 | * closest ancestor) that has the given class or the negated version of it. If neither `element` nor any of its
|
179 | * ancestors have the given class or the negated version of it, then the default activation will be returned.
|
180 | *
|
181 | * In the paradoxical situation where the closest ancestor contains __both__ the given class and the negated
|
182 | * version of it, the class is considered active.
|
183 | *
|
184 | * @param {Element} element
|
185 | * @param {string} className
|
186 | * @param {boolean} [defaultActivation=false]
|
187 | * @returns {boolean}
|
188 | */
|
189 | isActive: function (element, className, defaultActivation) {
|
190 | var no = 'no-' + className;
|
191 |
|
192 | while (element) {
|
193 | var classList = element.classList;
|
194 | if (classList.contains(className)) {
|
195 | return true;
|
196 | }
|
197 | if (classList.contains(no)) {
|
198 | return false;
|
199 | }
|
200 | element = element.parentElement;
|
201 | }
|
202 | return !!defaultActivation;
|
203 | }
|
204 | },
|
205 |
|
206 | /**
|
207 | * This namespace contains all currently loaded languages and the some helper functions to create and modify languages.
|
208 | *
|
209 | * @namespace
|
210 | * @memberof Prism
|
211 | * @public
|
212 | */
|
213 | languages: {
|
214 | /**
|
215 | * The grammar for plain, unformatted text.
|
216 | */
|
217 | plain: plainTextGrammar,
|
218 | plaintext: plainTextGrammar,
|
219 | text: plainTextGrammar,
|
220 | txt: plainTextGrammar,
|
221 |
|
222 | /**
|
223 | * Creates a deep copy of the language with the given id and appends the given tokens.
|
224 | *
|
225 | * If a token in `redef` also appears in the copied language, then the existing token in the copied language
|
226 | * will be overwritten at its original position.
|
227 | *
|
228 | * ## Best practices
|
229 | *
|
230 | * Since the position of overwriting tokens (token in `redef` that overwrite tokens in the copied language)
|
231 | * doesn't matter, they can technically be in any order. However, this can be confusing to others that trying to
|
232 | * understand the language definition because, normally, the order of tokens matters in Prism grammars.
|
233 | *
|
234 | * Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens.
|
235 | * Furthermore, all non-overwriting tokens should be placed after the overwriting ones.
|
236 | *
|
237 | * @param {string} id The id of the language to extend. This has to be a key in `Prism.languages`.
|
238 | * @param {Grammar} redef The new tokens to append.
|
239 | * @returns {Grammar} The new language created.
|
240 | * @public
|
241 | * @example
|
242 | * Prism.languages['css-with-colors'] = Prism.languages.extend('css', {
|
243 | * // Prism.languages.css already has a 'comment' token, so this token will overwrite CSS' 'comment' token
|
244 | * // at its original position
|
245 | * 'comment': { ... },
|
246 | * // CSS doesn't have a 'color' token, so this token will be appended
|
247 | * 'color': /\b(?:red|green|blue)\b/
|
248 | * });
|
249 | */
|
250 | extend: function (id, redef) {
|
251 | var lang = _.util.clone(_.languages[id]);
|
252 |
|
253 | for (var key in redef) {
|
254 | lang[key] = redef[key];
|
255 | }
|
256 |
|
257 | return lang;
|
258 | },
|
259 |
|
260 | /**
|
261 | * Inserts tokens _before_ another token in a language definition or any other grammar.
|
262 | *
|
263 | * ## Usage
|
264 | *
|
265 | * This helper method makes it easy to modify existing languages. For example, the CSS language definition
|
266 | * not only defines CSS highlighting for CSS documents, but also needs to define highlighting for CSS embedded
|
267 | * in HTML through `<style>` elements. To do this, it needs to modify `Prism.languages.markup` and add the
|
268 | * appropriate tokens. However, `Prism.languages.markup` is a regular JavaScript object literal, so if you do
|
269 | * this:
|
270 | *
|
271 | * ```js
|
272 | * Prism.languages.markup.style = {
|
273 | * // token
|
274 | * };
|
275 | * ```
|
276 | *
|
277 | * then the `style` token will be added (and processed) at the end. `insertBefore` allows you to insert tokens
|
278 | * before existing tokens. For the CSS example above, you would use it like this:
|
279 | *
|
280 | * ```js
|
281 | * Prism.languages.insertBefore('markup', 'cdata', {
|
282 | * 'style': {
|
283 | * // token
|
284 | * }
|
285 | * });
|
286 | * ```
|
287 | *
|
288 | * ## Special cases
|
289 | *
|
290 | * If the grammars of `inside` and `insert` have tokens with the same name, the tokens in `inside`'s grammar
|
291 | * will be ignored.
|
292 | *
|
293 | * This behavior can be used to insert tokens after `before`:
|
294 | *
|
295 | * ```js
|
296 | * Prism.languages.insertBefore('markup', 'comment', {
|
297 | * 'comment': Prism.languages.markup.comment,
|
298 | * // tokens after 'comment'
|
299 | * });
|
300 | * ```
|
301 | *
|
302 | * ## Limitations
|
303 | *
|
304 | * The main problem `insertBefore` has to solve is iteration order. Since ES2015, the iteration order for object
|
305 | * properties is guaranteed to be the insertion order (except for integer keys) but some browsers behave
|
306 | * differently when keys are deleted and re-inserted. So `insertBefore` can't be implemented by temporarily
|
307 | * deleting properties which is necessary to insert at arbitrary positions.
|
308 | *
|
309 | * To solve this problem, `insertBefore` doesn't actually insert the given tokens into the target object.
|
310 | * Instead, it will create a new object and replace all references to the target object with the new one. This
|
311 | * can be done without temporarily deleting properties, so the iteration order is well-defined.
|
312 | *
|
313 | * However, only references that can be reached from `Prism.languages` or `insert` will be replaced. I.e. if
|
314 | * you hold the target object in a variable, then the value of the variable will not change.
|
315 | *
|
316 | * ```js
|
317 | * var oldMarkup = Prism.languages.markup;
|
318 | * var newMarkup = Prism.languages.insertBefore('markup', 'comment', { ... });
|
319 | *
|
320 | * assert(oldMarkup !== Prism.languages.markup);
|
321 | * assert(newMarkup === Prism.languages.markup);
|
322 | * ```
|
323 | *
|
324 | * @param {string} inside The property of `root` (e.g. a language id in `Prism.languages`) that contains the
|
325 | * object to be modified.
|
326 | * @param {string} before The key to insert before.
|
327 | * @param {Grammar} insert An object containing the key-value pairs to be inserted.
|
328 | * @param {Object<string, any>} [root] The object containing `inside`, i.e. the object that contains the
|
329 | * object to be modified.
|
330 | *
|
331 | * Defaults to `Prism.languages`.
|
332 | * @returns {Grammar} The new grammar object.
|
333 | * @public
|
334 | */
|
335 | insertBefore: function (inside, before, insert, root) {
|
336 | root = root || /** @type {any} */ (_.languages);
|
337 | var grammar = root[inside];
|
338 | /** @type {Grammar} */
|
339 | var ret = {};
|
340 |
|
341 | for (var token in grammar) {
|
342 | if (grammar.hasOwnProperty(token)) {
|
343 |
|
344 | if (token == before) {
|
345 | for (var newToken in insert) {
|
346 | if (insert.hasOwnProperty(newToken)) {
|
347 | ret[newToken] = insert[newToken];
|
348 | }
|
349 | }
|
350 | }
|
351 |
|
352 | // Do not insert token which also occur in insert. See #1525
|
353 | if (!insert.hasOwnProperty(token)) {
|
354 | ret[token] = grammar[token];
|
355 | }
|
356 | }
|
357 | }
|
358 |
|
359 | var old = root[inside];
|
360 | root[inside] = ret;
|
361 |
|
362 | // Update references in other language definitions
|
363 | _.languages.DFS(_.languages, function (key, value) {
|
364 | if (value === old && key != inside) {
|
365 | this[key] = ret;
|
366 | }
|
367 | });
|
368 |
|
369 | return ret;
|
370 | },
|
371 |
|
372 | // Traverse a language definition with Depth First Search
|
373 | DFS: function DFS(o, callback, type, visited) {
|
374 | visited = visited || {};
|
375 |
|
376 | var objId = _.util.objId;
|
377 |
|
378 | for (var i in o) {
|
379 | if (o.hasOwnProperty(i)) {
|
380 | callback.call(o, i, o[i], type || i);
|
381 |
|
382 | var property = o[i];
|
383 | var propertyType = _.util.type(property);
|
384 |
|
385 | if (propertyType === 'Object' && !visited[objId(property)]) {
|
386 | visited[objId(property)] = true;
|
387 | DFS(property, callback, null, visited);
|
388 | } else if (propertyType === 'Array' && !visited[objId(property)]) {
|
389 | visited[objId(property)] = true;
|
390 | DFS(property, callback, i, visited);
|
391 | }
|
392 | }
|
393 | }
|
394 | }
|
395 | },
|
396 |
|
397 | plugins: {},
|
398 |
|
399 |
|
400 | /**
|
401 | * Low-level function, only use if you know what you’re doing. It accepts a string of text as input
|
402 | * and the language definitions to use, and returns a string with the HTML produced.
|
403 | *
|
404 | * The following hooks will be run:
|
405 | * 1. `before-tokenize`
|
406 | * 2. `after-tokenize`
|
407 | * 3. `wrap`: On each {@link Token}.
|
408 | *
|
409 | * @param {string} text A string with the code to be highlighted.
|
410 | * @param {Grammar} grammar An object containing the tokens to use.
|
411 | *
|
412 | * Usually a language definition like `Prism.languages.markup`.
|
413 | * @param {string} language The name of the language definition passed to `grammar`.
|
414 | * @returns {string} The highlighted HTML.
|
415 | * @memberof Prism
|
416 | * @public
|
417 | * @example
|
418 | * Prism.highlight('var foo = true;', Prism.languages.javascript, 'javascript');
|
419 | */
|
420 | highlight: function (text, grammar, language) {
|
421 | var env = {
|
422 | code: text,
|
423 | grammar: grammar,
|
424 | language: language
|
425 | };
|
426 | _.hooks.run('before-tokenize', env);
|
427 | env.tokens = _.tokenize(env.code, env.grammar);
|
428 | _.hooks.run('after-tokenize', env);
|
429 | return Token.stringify(_.util.encode(env.tokens), env.language);
|
430 | },
|
431 |
|
432 | /**
|
433 | * This is the heart of Prism, and the most low-level function you can use. It accepts a string of text as input
|
434 | * and the language definitions to use, and returns an array with the tokenized code.
|
435 | *
|
436 | * When the language definition includes nested tokens, the function is called recursively on each of these tokens.
|
437 | *
|
438 | * This method could be useful in other contexts as well, as a very crude parser.
|
439 | *
|
440 | * @param {string} text A string with the code to be highlighted.
|
441 | * @param {Grammar} grammar An object containing the tokens to use.
|
442 | *
|
443 | * Usually a language definition like `Prism.languages.markup`.
|
444 | * @returns {TokenStream} An array of strings and tokens, a token stream.
|
445 | * @memberof Prism
|
446 | * @public
|
447 | * @example
|
448 | * let code = `var foo = 0;`;
|
449 | * let tokens = Prism.tokenize(code, Prism.languages.javascript);
|
450 | * tokens.forEach(token => {
|
451 | * if (token instanceof Prism.Token && token.type === 'number') {
|
452 | * console.log(`Found numeric literal: ${token.content}`);
|
453 | * }
|
454 | * });
|
455 | */
|
456 | tokenize: function (text, grammar) {
|
457 | var rest = grammar.rest;
|
458 | if (rest) {
|
459 | for (var token in rest) {
|
460 | grammar[token] = rest[token];
|
461 | }
|
462 |
|
463 | delete grammar.rest;
|
464 | }
|
465 |
|
466 | var tokenList = new LinkedList();
|
467 | addAfter(tokenList, tokenList.head, text);
|
468 |
|
469 | matchGrammar(text, tokenList, grammar, tokenList.head, 0);
|
470 |
|
471 | return toArray(tokenList);
|
472 | },
|
473 |
|
474 | /**
|
475 | * @namespace
|
476 | * @memberof Prism
|
477 | * @public
|
478 | */
|
479 | hooks: {
|
480 | all: {},
|
481 |
|
482 | /**
|
483 | * Adds the given callback to the list of callbacks for the given hook.
|
484 | *
|
485 | * The callback will be invoked when the hook it is registered for is run.
|
486 | * Hooks are usually directly run by a highlight function but you can also run hooks yourself.
|
487 | *
|
488 | * One callback function can be registered to multiple hooks and the same hook multiple times.
|
489 | *
|
490 | * @param {string} name The name of the hook.
|
491 | * @param {HookCallback} callback The callback function which is given environment variables.
|
492 | * @public
|
493 | */
|
494 | add: function (name, callback) {
|
495 | var hooks = _.hooks.all;
|
496 |
|
497 | hooks[name] = hooks[name] || [];
|
498 |
|
499 | hooks[name].push(callback);
|
500 | },
|
501 |
|
502 | /**
|
503 | * Runs a hook invoking all registered callbacks with the given environment variables.
|
504 | *
|
505 | * Callbacks will be invoked synchronously and in the order in which they were registered.
|
506 | *
|
507 | * @param {string} name The name of the hook.
|
508 | * @param {Object<string, any>} env The environment variables of the hook passed to all callbacks registered.
|
509 | * @public
|
510 | */
|
511 | run: function (name, env) {
|
512 | var callbacks = _.hooks.all[name];
|
513 |
|
514 | if (!callbacks || !callbacks.length) {
|
515 | return;
|
516 | }
|
517 |
|
518 | for (var i = 0, callback; (callback = callbacks[i++]);) {
|
519 | callback(env);
|
520 | }
|
521 | }
|
522 | },
|
523 |
|
524 | Token: Token
|
525 | };
|
526 |
|
527 |
|
528 | // Typescript note:
|
529 | // The following can be used to import the Token type in JSDoc:
|
530 | //
|
531 | // @typedef {InstanceType<import("./prism-core")["Token"]>} Token
|
532 |
|
533 | /**
|
534 | * Creates a new token.
|
535 | *
|
536 | * @param {string} type See {@link Token#type type}
|
537 | * @param {string | TokenStream} content See {@link Token#content content}
|
538 | * @param {string|string[]} [alias] The alias(es) of the token.
|
539 | * @param {string} [matchedStr=""] A copy of the full string this token was created from.
|
540 | * @class
|
541 | * @global
|
542 | * @public
|
543 | */
|
544 | function Token(type, content, alias, matchedStr) {
|
545 | /**
|
546 | * The type of the token.
|
547 | *
|
548 | * This is usually the key of a pattern in a {@link Grammar}.
|
549 | *
|
550 | * @type {string}
|
551 | * @see GrammarToken
|
552 | * @public
|
553 | */
|
554 | this.type = type;
|
555 | /**
|
556 | * The strings or tokens contained by this token.
|
557 | *
|
558 | * This will be a token stream if the pattern matched also defined an `inside` grammar.
|
559 | *
|
560 | * @type {string | TokenStream}
|
561 | * @public
|
562 | */
|
563 | this.content = content;
|
564 | /**
|
565 | * The alias(es) of the token.
|
566 | *
|
567 | * @type {string|string[]}
|
568 | * @see GrammarToken
|
569 | * @public
|
570 | */
|
571 | this.alias = alias;
|
572 | // Copy of the full string this token was created from
|
573 | this.length = (matchedStr || '').length | 0;
|
574 | }
|
575 |
|
576 | /**
|
577 | * A token stream is an array of strings and {@link Token Token} objects.
|
578 | *
|
579 | * Token streams have to fulfill a few properties that are assumed by most functions (mostly internal ones) that process
|
580 | * them.
|
581 | *
|
582 | * 1. No adjacent strings.
|
583 | * 2. No empty strings.
|
584 | *
|
585 | * The only exception here is the token stream that only contains the empty string and nothing else.
|
586 | *
|
587 | * @typedef {Array<string | Token>} TokenStream
|
588 | * @global
|
589 | * @public
|
590 | */
|
591 |
|
592 | /**
|
593 | * Converts the given token or token stream to an HTML representation.
|
594 | *
|
595 | * The following hooks will be run:
|
596 | * 1. `wrap`: On each {@link Token}.
|
597 | *
|
598 | * @param {string | Token | TokenStream} o The token or token stream to be converted.
|
599 | * @param {string} language The name of current language.
|
600 | * @returns {string} The HTML representation of the token or token stream.
|
601 | * @memberof Token
|
602 | * @static
|
603 | */
|
604 | Token.stringify = function stringify(o, language) {
|
605 | if (typeof o == 'string') {
|
606 | return o;
|
607 | }
|
608 | if (Array.isArray(o)) {
|
609 | var s = '';
|
610 | o.forEach(function (e) {
|
611 | s += stringify(e, language);
|
612 | });
|
613 | return s;
|
614 | }
|
615 |
|
616 | var env = {
|
617 | type: o.type,
|
618 | content: stringify(o.content, language),
|
619 | tag: 'span',
|
620 | classes: ['token', o.type],
|
621 | attributes: {},
|
622 | language: language
|
623 | };
|
624 |
|
625 | var aliases = o.alias;
|
626 | if (aliases) {
|
627 | if (Array.isArray(aliases)) {
|
628 | Array.prototype.push.apply(env.classes, aliases);
|
629 | } else {
|
630 | env.classes.push(aliases);
|
631 | }
|
632 | }
|
633 |
|
634 | _.hooks.run('wrap', env);
|
635 |
|
636 | var attributes = '';
|
637 | for (var name in env.attributes) {
|
638 | attributes += ' ' + name + '="' + (env.attributes[name] || '').replace(/"/g, '"') + '"';
|
639 | }
|
640 |
|
641 | return '<' + env.tag + ' class="' + env.classes.join(' ') + '"' + attributes + '>' + env.content + '</' + env.tag + '>';
|
642 | };
|
643 |
|
644 | /**
|
645 | * @param {RegExp} pattern
|
646 | * @param {number} pos
|
647 | * @param {string} text
|
648 | * @param {boolean} lookbehind
|
649 | * @returns {RegExpExecArray | null}
|
650 | */
|
651 | function matchPattern(pattern, pos, text, lookbehind) {
|
652 | pattern.lastIndex = pos;
|
653 | var match = pattern.exec(text);
|
654 | if (match && lookbehind && match[1]) {
|
655 | // change the match to remove the text matched by the Prism lookbehind group
|
656 | var lookbehindLength = match[1].length;
|
657 | match.index += lookbehindLength;
|
658 | match[0] = match[0].slice(lookbehindLength);
|
659 | }
|
660 | return match;
|
661 | }
|
662 |
|
663 | /**
|
664 | * @param {string} text
|
665 | * @param {LinkedList<string | Token>} tokenList
|
666 | * @param {any} grammar
|
667 | * @param {LinkedListNode<string | Token>} startNode
|
668 | * @param {number} startPos
|
669 | * @param {RematchOptions} [rematch]
|
670 | * @returns {void}
|
671 | * @private
|
672 | *
|
673 | * @typedef RematchOptions
|
674 | * @property {string} cause
|
675 | * @property {number} reach
|
676 | */
|
677 | function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) {
|
678 | for (var token in grammar) {
|
679 | if (!grammar.hasOwnProperty(token) || !grammar[token]) {
|
680 | continue;
|
681 | }
|
682 |
|
683 | var patterns = grammar[token];
|
684 | patterns = Array.isArray(patterns) ? patterns : [patterns];
|
685 |
|
686 | for (var j = 0; j < patterns.length; ++j) {
|
687 | if (rematch && rematch.cause == token + ',' + j) {
|
688 | return;
|
689 | }
|
690 |
|
691 | var patternObj = patterns[j];
|
692 | var inside = patternObj.inside;
|
693 | var lookbehind = !!patternObj.lookbehind;
|
694 | var greedy = !!patternObj.greedy;
|
695 | var alias = patternObj.alias;
|
696 |
|
697 | if (greedy && !patternObj.pattern.global) {
|
698 | // Without the global flag, lastIndex won't work
|
699 | var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0];
|
700 | patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g');
|
701 | }
|
702 |
|
703 | /** @type {RegExp} */
|
704 | var pattern = patternObj.pattern || patternObj;
|
705 |
|
706 | for ( // iterate the token list and keep track of the current token/string position
|
707 | var currentNode = startNode.next, pos = startPos;
|
708 | currentNode !== tokenList.tail;
|
709 | pos += currentNode.value.length, currentNode = currentNode.next
|
710 | ) {
|
711 |
|
712 | if (rematch && pos >= rematch.reach) {
|
713 | break;
|
714 | }
|
715 |
|
716 | var str = currentNode.value;
|
717 |
|
718 | if (tokenList.length > text.length) {
|
719 | // Something went terribly wrong, ABORT, ABORT!
|
720 | return;
|
721 | }
|
722 |
|
723 | if (str instanceof Token) {
|
724 | continue;
|
725 | }
|
726 |
|
727 | var removeCount = 1; // this is the to parameter of removeBetween
|
728 | var match;
|
729 |
|
730 | if (greedy) {
|
731 | match = matchPattern(pattern, pos, text, lookbehind);
|
732 | if (!match || match.index >= text.length) {
|
733 | break;
|
734 | }
|
735 |
|
736 | var from = match.index;
|
737 | var to = match.index + match[0].length;
|
738 | var p = pos;
|
739 |
|
740 | // find the node that contains the match
|
741 | p += currentNode.value.length;
|
742 | while (from >= p) {
|
743 | currentNode = currentNode.next;
|
744 | p += currentNode.value.length;
|
745 | }
|
746 | // adjust pos (and p)
|
747 | p -= currentNode.value.length;
|
748 | pos = p;
|
749 |
|
750 | // the current node is a Token, then the match starts inside another Token, which is invalid
|
751 | if (currentNode.value instanceof Token) {
|
752 | continue;
|
753 | }
|
754 |
|
755 | // find the last node which is affected by this match
|
756 | for (
|
757 | var k = currentNode;
|
758 | k !== tokenList.tail && (p < to || typeof k.value === 'string');
|
759 | k = k.next
|
760 | ) {
|
761 | removeCount++;
|
762 | p += k.value.length;
|
763 | }
|
764 | removeCount--;
|
765 |
|
766 | // replace with the new match
|
767 | str = text.slice(pos, p);
|
768 | match.index -= pos;
|
769 | } else {
|
770 | match = matchPattern(pattern, 0, str, lookbehind);
|
771 | if (!match) {
|
772 | continue;
|
773 | }
|
774 | }
|
775 |
|
776 | // eslint-disable-next-line no-redeclare
|
777 | var from = match.index;
|
778 | var matchStr = match[0];
|
779 | var before = str.slice(0, from);
|
780 | var after = str.slice(from + matchStr.length);
|
781 |
|
782 | var reach = pos + str.length;
|
783 | if (rematch && reach > rematch.reach) {
|
784 | rematch.reach = reach;
|
785 | }
|
786 |
|
787 | var removeFrom = currentNode.prev;
|
788 |
|
789 | if (before) {
|
790 | removeFrom = addAfter(tokenList, removeFrom, before);
|
791 | pos += before.length;
|
792 | }
|
793 |
|
794 | removeRange(tokenList, removeFrom, removeCount);
|
795 |
|
796 | var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr);
|
797 | currentNode = addAfter(tokenList, removeFrom, wrapped);
|
798 |
|
799 | if (after) {
|
800 | addAfter(tokenList, currentNode, after);
|
801 | }
|
802 |
|
803 | if (removeCount > 1) {
|
804 | // at least one Token object was removed, so we have to do some rematching
|
805 | // this can only happen if the current pattern is greedy
|
806 |
|
807 | /** @type {RematchOptions} */
|
808 | var nestedRematch = {
|
809 | cause: token + ',' + j,
|
810 | reach: reach
|
811 | };
|
812 | matchGrammar(text, tokenList, grammar, currentNode.prev, pos, nestedRematch);
|
813 |
|
814 | // the reach might have been extended because of the rematching
|
815 | if (rematch && nestedRematch.reach > rematch.reach) {
|
816 | rematch.reach = nestedRematch.reach;
|
817 | }
|
818 | }
|
819 | }
|
820 | }
|
821 | }
|
822 | }
|
823 |
|
824 | /**
|
825 | * @typedef LinkedListNode
|
826 | * @property {T} value
|
827 | * @property {LinkedListNode<T> | null} prev The previous node.
|
828 | * @property {LinkedListNode<T> | null} next The next node.
|
829 | * @template T
|
830 | * @private
|
831 | */
|
832 |
|
833 | /**
|
834 | * @template T
|
835 | * @private
|
836 | */
|
837 | function LinkedList() {
|
838 | /** @type {LinkedListNode<T>} */
|
839 | var head = { value: null, prev: null, next: null };
|
840 | /** @type {LinkedListNode<T>} */
|
841 | var tail = { value: null, prev: head, next: null };
|
842 | head.next = tail;
|
843 |
|
844 | /** @type {LinkedListNode<T>} */
|
845 | this.head = head;
|
846 | /** @type {LinkedListNode<T>} */
|
847 | this.tail = tail;
|
848 | this.length = 0;
|
849 | }
|
850 |
|
851 | /**
|
852 | * Adds a new node with the given value to the list.
|
853 | *
|
854 | * @param {LinkedList<T>} list
|
855 | * @param {LinkedListNode<T>} node
|
856 | * @param {T} value
|
857 | * @returns {LinkedListNode<T>} The added node.
|
858 | * @template T
|
859 | */
|
860 | function addAfter(list, node, value) {
|
861 | // assumes that node != list.tail && values.length >= 0
|
862 | var next = node.next;
|
863 |
|
864 | var newNode = { value: value, prev: node, next: next };
|
865 | node.next = newNode;
|
866 | next.prev = newNode;
|
867 | list.length++;
|
868 |
|
869 | return newNode;
|
870 | }
|
871 | /**
|
872 | * Removes `count` nodes after the given node. The given node will not be removed.
|
873 | *
|
874 | * @param {LinkedList<T>} list
|
875 | * @param {LinkedListNode<T>} node
|
876 | * @param {number} count
|
877 | * @template T
|
878 | */
|
879 | function removeRange(list, node, count) {
|
880 | var next = node.next;
|
881 | for (var i = 0; i < count && next !== list.tail; i++) {
|
882 | next = next.next;
|
883 | }
|
884 | node.next = next;
|
885 | next.prev = node;
|
886 | list.length -= i;
|
887 | }
|
888 | /**
|
889 | * @param {LinkedList<T>} list
|
890 | * @returns {T[]}
|
891 | * @template T
|
892 | */
|
893 | function toArray(list) {
|
894 | var array = [];
|
895 | var node = list.head.next;
|
896 | while (node !== list.tail) {
|
897 | array.push(node.value);
|
898 | node = node.next;
|
899 | }
|
900 | return array;
|
901 | }
|
902 |
|
903 | return _;
|
904 |
|
905 | }());
|
906 |
|
907 | var prism = Prism;
|
908 | Prism.default = Prism;
|
909 |
|
910 | export { prism as p };
|