1 | ;
|
2 |
|
3 | /**
|
4 | * Prism: Lightweight, robust, elegant syntax highlighting
|
5 | *
|
6 | * @license MIT <https://opensource.org/licenses/MIT>
|
7 | * @author Lea Verou <https://lea.verou.me>
|
8 | * @namespace
|
9 | * @public
|
10 | */
|
11 | /**
|
12 | * prism-react-renderer:
|
13 | * This file has been modified to remove:
|
14 | * - globals and window dependency
|
15 | * - worker support
|
16 | * - highlightAll and other element dependent methods
|
17 | * - _.hooks helpers
|
18 | * - UMD/node-specific hacks
|
19 | * It has also been run through prettier
|
20 | */
|
21 |
|
22 | var Prism = (function () {
|
23 |
|
24 | // Private helper vars
|
25 | var lang = /(?:^|\s)lang(?:uage)?-([\w-]+)(?=\s|$)/i;
|
26 | var uniqueId = 0;
|
27 |
|
28 | // The grammar object for plaintext
|
29 | var plainTextGrammar = {};
|
30 |
|
31 |
|
32 | var _ = {
|
33 | /**
|
34 | * A namespace for utility methods.
|
35 | *
|
36 | * All function in this namespace that are not explicitly marked as _public_ are for __internal use only__ and may
|
37 | * change or disappear at any time.
|
38 | *
|
39 | * @namespace
|
40 | * @memberof Prism
|
41 | */
|
42 | util: {
|
43 | encode: function encode(tokens) {
|
44 | if (tokens instanceof Token) {
|
45 | return new Token(tokens.type, encode(tokens.content), tokens.alias);
|
46 | } else if (Array.isArray(tokens)) {
|
47 | return tokens.map(encode);
|
48 | } else {
|
49 | return tokens.replace(/&/g, '&').replace(/</g, '<').replace(/\u00a0/g, ' ');
|
50 | }
|
51 | },
|
52 |
|
53 | /**
|
54 | * Returns the name of the type of the given value.
|
55 | *
|
56 | * @param {any} o
|
57 | * @returns {string}
|
58 | * @example
|
59 | * type(null) === 'Null'
|
60 | * type(undefined) === 'Undefined'
|
61 | * type(123) === 'Number'
|
62 | * type('foo') === 'String'
|
63 | * type(true) === 'Boolean'
|
64 | * type([1, 2]) === 'Array'
|
65 | * type({}) === 'Object'
|
66 | * type(String) === 'Function'
|
67 | * type(/abc+/) === 'RegExp'
|
68 | */
|
69 | type: function (o) {
|
70 | return Object.prototype.toString.call(o).slice(8, -1);
|
71 | },
|
72 |
|
73 | /**
|
74 | * Returns a unique number for the given object. Later calls will still return the same number.
|
75 | *
|
76 | * @param {Object} obj
|
77 | * @returns {number}
|
78 | */
|
79 | objId: function (obj) {
|
80 | if (!obj['__id']) {
|
81 | Object.defineProperty(obj, '__id', { value: ++uniqueId });
|
82 | }
|
83 | return obj['__id'];
|
84 | },
|
85 |
|
86 | /**
|
87 | * Creates a deep clone of the given object.
|
88 | *
|
89 | * The main intended use of this function is to clone language definitions.
|
90 | *
|
91 | * @param {T} o
|
92 | * @param {Record<number, any>} [visited]
|
93 | * @returns {T}
|
94 | * @template T
|
95 | */
|
96 | clone: function deepClone(o, visited) {
|
97 | visited = visited || {};
|
98 |
|
99 | var clone; var id;
|
100 | switch (_.util.type(o)) {
|
101 | case 'Object':
|
102 | id = _.util.objId(o);
|
103 | if (visited[id]) {
|
104 | return visited[id];
|
105 | }
|
106 | clone = /** @type {Record<string, any>} */ ({});
|
107 | visited[id] = clone;
|
108 |
|
109 | for (var key in o) {
|
110 | if (o.hasOwnProperty(key)) {
|
111 | clone[key] = deepClone(o[key], visited);
|
112 | }
|
113 | }
|
114 |
|
115 | return /** @type {any} */ (clone);
|
116 |
|
117 | case 'Array':
|
118 | id = _.util.objId(o);
|
119 | if (visited[id]) {
|
120 | return visited[id];
|
121 | }
|
122 | clone = [];
|
123 | visited[id] = clone;
|
124 |
|
125 | (/** @type {Array} */(/** @type {any} */(o))).forEach(function (v, i) {
|
126 | clone[i] = deepClone(v, visited);
|
127 | });
|
128 |
|
129 | return /** @type {any} */ (clone);
|
130 |
|
131 | default:
|
132 | return o;
|
133 | }
|
134 | },
|
135 |
|
136 | /**
|
137 | * Returns the Prism language of the given element set by a `language-xxxx` or `lang-xxxx` class.
|
138 | *
|
139 | * If no language is set for the element or the element is `null` or `undefined`, `none` will be returned.
|
140 | *
|
141 | * @param {Element} element
|
142 | * @returns {string}
|
143 | */
|
144 | getLanguage: function (element) {
|
145 | while (element) {
|
146 | var m = lang.exec(element.className);
|
147 | if (m) {
|
148 | return m[1].toLowerCase();
|
149 | }
|
150 | element = element.parentElement;
|
151 | }
|
152 | return 'none';
|
153 | },
|
154 |
|
155 | /**
|
156 | * Sets the Prism `language-xxxx` class of the given element.
|
157 | *
|
158 | * @param {Element} element
|
159 | * @param {string} language
|
160 | * @returns {void}
|
161 | */
|
162 | setLanguage: function (element, language) {
|
163 | // remove all `language-xxxx` classes
|
164 | // (this might leave behind a leading space)
|
165 | element.className = element.className.replace(RegExp(lang, 'gi'), '');
|
166 |
|
167 | // add the new `language-xxxx` class
|
168 | // (using `classList` will automatically clean up spaces for us)
|
169 | element.classList.add('language-' + language);
|
170 | },
|
171 |
|
172 | /**
|
173 | * Returns whether a given class is active for `element`.
|
174 | *
|
175 | * The class can be activated if `element` or one of its ancestors has the given class and it can be deactivated
|
176 | * if `element` or one of its ancestors has the negated version of the given class. The _negated version_ of the
|
177 | * given class is just the given class with a `no-` prefix.
|
178 | *
|
179 | * Whether the class is active is determined by the closest ancestor of `element` (where `element` itself is
|
180 | * closest ancestor) that has the given class or the negated version of it. If neither `element` nor any of its
|
181 | * ancestors have the given class or the negated version of it, then the default activation will be returned.
|
182 | *
|
183 | * In the paradoxical situation where the closest ancestor contains __both__ the given class and the negated
|
184 | * version of it, the class is considered active.
|
185 | *
|
186 | * @param {Element} element
|
187 | * @param {string} className
|
188 | * @param {boolean} [defaultActivation=false]
|
189 | * @returns {boolean}
|
190 | */
|
191 | isActive: function (element, className, defaultActivation) {
|
192 | var no = 'no-' + className;
|
193 |
|
194 | while (element) {
|
195 | var classList = element.classList;
|
196 | if (classList.contains(className)) {
|
197 | return true;
|
198 | }
|
199 | if (classList.contains(no)) {
|
200 | return false;
|
201 | }
|
202 | element = element.parentElement;
|
203 | }
|
204 | return !!defaultActivation;
|
205 | }
|
206 | },
|
207 |
|
208 | /**
|
209 | * This namespace contains all currently loaded languages and the some helper functions to create and modify languages.
|
210 | *
|
211 | * @namespace
|
212 | * @memberof Prism
|
213 | * @public
|
214 | */
|
215 | languages: {
|
216 | /**
|
217 | * The grammar for plain, unformatted text.
|
218 | */
|
219 | plain: plainTextGrammar,
|
220 | plaintext: plainTextGrammar,
|
221 | text: plainTextGrammar,
|
222 | txt: plainTextGrammar,
|
223 |
|
224 | /**
|
225 | * Creates a deep copy of the language with the given id and appends the given tokens.
|
226 | *
|
227 | * If a token in `redef` also appears in the copied language, then the existing token in the copied language
|
228 | * will be overwritten at its original position.
|
229 | *
|
230 | * ## Best practices
|
231 | *
|
232 | * Since the position of overwriting tokens (token in `redef` that overwrite tokens in the copied language)
|
233 | * doesn't matter, they can technically be in any order. However, this can be confusing to others that trying to
|
234 | * understand the language definition because, normally, the order of tokens matters in Prism grammars.
|
235 | *
|
236 | * Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens.
|
237 | * Furthermore, all non-overwriting tokens should be placed after the overwriting ones.
|
238 | *
|
239 | * @param {string} id The id of the language to extend. This has to be a key in `Prism.languages`.
|
240 | * @param {Grammar} redef The new tokens to append.
|
241 | * @returns {Grammar} The new language created.
|
242 | * @public
|
243 | * @example
|
244 | * Prism.languages['css-with-colors'] = Prism.languages.extend('css', {
|
245 | * // Prism.languages.css already has a 'comment' token, so this token will overwrite CSS' 'comment' token
|
246 | * // at its original position
|
247 | * 'comment': { ... },
|
248 | * // CSS doesn't have a 'color' token, so this token will be appended
|
249 | * 'color': /\b(?:red|green|blue)\b/
|
250 | * });
|
251 | */
|
252 | extend: function (id, redef) {
|
253 | var lang = _.util.clone(_.languages[id]);
|
254 |
|
255 | for (var key in redef) {
|
256 | lang[key] = redef[key];
|
257 | }
|
258 |
|
259 | return lang;
|
260 | },
|
261 |
|
262 | /**
|
263 | * Inserts tokens _before_ another token in a language definition or any other grammar.
|
264 | *
|
265 | * ## Usage
|
266 | *
|
267 | * This helper method makes it easy to modify existing languages. For example, the CSS language definition
|
268 | * not only defines CSS highlighting for CSS documents, but also needs to define highlighting for CSS embedded
|
269 | * in HTML through `<style>` elements. To do this, it needs to modify `Prism.languages.markup` and add the
|
270 | * appropriate tokens. However, `Prism.languages.markup` is a regular JavaScript object literal, so if you do
|
271 | * this:
|
272 | *
|
273 | * ```js
|
274 | * Prism.languages.markup.style = {
|
275 | * // token
|
276 | * };
|
277 | * ```
|
278 | *
|
279 | * then the `style` token will be added (and processed) at the end. `insertBefore` allows you to insert tokens
|
280 | * before existing tokens. For the CSS example above, you would use it like this:
|
281 | *
|
282 | * ```js
|
283 | * Prism.languages.insertBefore('markup', 'cdata', {
|
284 | * 'style': {
|
285 | * // token
|
286 | * }
|
287 | * });
|
288 | * ```
|
289 | *
|
290 | * ## Special cases
|
291 | *
|
292 | * If the grammars of `inside` and `insert` have tokens with the same name, the tokens in `inside`'s grammar
|
293 | * will be ignored.
|
294 | *
|
295 | * This behavior can be used to insert tokens after `before`:
|
296 | *
|
297 | * ```js
|
298 | * Prism.languages.insertBefore('markup', 'comment', {
|
299 | * 'comment': Prism.languages.markup.comment,
|
300 | * // tokens after 'comment'
|
301 | * });
|
302 | * ```
|
303 | *
|
304 | * ## Limitations
|
305 | *
|
306 | * The main problem `insertBefore` has to solve is iteration order. Since ES2015, the iteration order for object
|
307 | * properties is guaranteed to be the insertion order (except for integer keys) but some browsers behave
|
308 | * differently when keys are deleted and re-inserted. So `insertBefore` can't be implemented by temporarily
|
309 | * deleting properties which is necessary to insert at arbitrary positions.
|
310 | *
|
311 | * To solve this problem, `insertBefore` doesn't actually insert the given tokens into the target object.
|
312 | * Instead, it will create a new object and replace all references to the target object with the new one. This
|
313 | * can be done without temporarily deleting properties, so the iteration order is well-defined.
|
314 | *
|
315 | * However, only references that can be reached from `Prism.languages` or `insert` will be replaced. I.e. if
|
316 | * you hold the target object in a variable, then the value of the variable will not change.
|
317 | *
|
318 | * ```js
|
319 | * var oldMarkup = Prism.languages.markup;
|
320 | * var newMarkup = Prism.languages.insertBefore('markup', 'comment', { ... });
|
321 | *
|
322 | * assert(oldMarkup !== Prism.languages.markup);
|
323 | * assert(newMarkup === Prism.languages.markup);
|
324 | * ```
|
325 | *
|
326 | * @param {string} inside The property of `root` (e.g. a language id in `Prism.languages`) that contains the
|
327 | * object to be modified.
|
328 | * @param {string} before The key to insert before.
|
329 | * @param {Grammar} insert An object containing the key-value pairs to be inserted.
|
330 | * @param {Object<string, any>} [root] The object containing `inside`, i.e. the object that contains the
|
331 | * object to be modified.
|
332 | *
|
333 | * Defaults to `Prism.languages`.
|
334 | * @returns {Grammar} The new grammar object.
|
335 | * @public
|
336 | */
|
337 | insertBefore: function (inside, before, insert, root) {
|
338 | root = root || /** @type {any} */ (_.languages);
|
339 | var grammar = root[inside];
|
340 | /** @type {Grammar} */
|
341 | var ret = {};
|
342 |
|
343 | for (var token in grammar) {
|
344 | if (grammar.hasOwnProperty(token)) {
|
345 |
|
346 | if (token == before) {
|
347 | for (var newToken in insert) {
|
348 | if (insert.hasOwnProperty(newToken)) {
|
349 | ret[newToken] = insert[newToken];
|
350 | }
|
351 | }
|
352 | }
|
353 |
|
354 | // Do not insert token which also occur in insert. See #1525
|
355 | if (!insert.hasOwnProperty(token)) {
|
356 | ret[token] = grammar[token];
|
357 | }
|
358 | }
|
359 | }
|
360 |
|
361 | var old = root[inside];
|
362 | root[inside] = ret;
|
363 |
|
364 | // Update references in other language definitions
|
365 | _.languages.DFS(_.languages, function (key, value) {
|
366 | if (value === old && key != inside) {
|
367 | this[key] = ret;
|
368 | }
|
369 | });
|
370 |
|
371 | return ret;
|
372 | },
|
373 |
|
374 | // Traverse a language definition with Depth First Search
|
375 | DFS: function DFS(o, callback, type, visited) {
|
376 | visited = visited || {};
|
377 |
|
378 | var objId = _.util.objId;
|
379 |
|
380 | for (var i in o) {
|
381 | if (o.hasOwnProperty(i)) {
|
382 | callback.call(o, i, o[i], type || i);
|
383 |
|
384 | var property = o[i];
|
385 | var propertyType = _.util.type(property);
|
386 |
|
387 | if (propertyType === 'Object' && !visited[objId(property)]) {
|
388 | visited[objId(property)] = true;
|
389 | DFS(property, callback, null, visited);
|
390 | } else if (propertyType === 'Array' && !visited[objId(property)]) {
|
391 | visited[objId(property)] = true;
|
392 | DFS(property, callback, i, visited);
|
393 | }
|
394 | }
|
395 | }
|
396 | }
|
397 | },
|
398 |
|
399 | plugins: {},
|
400 |
|
401 |
|
402 | /**
|
403 | * Low-level function, only use if you know what you’re doing. It accepts a string of text as input
|
404 | * and the language definitions to use, and returns a string with the HTML produced.
|
405 | *
|
406 | * The following hooks will be run:
|
407 | * 1. `before-tokenize`
|
408 | * 2. `after-tokenize`
|
409 | * 3. `wrap`: On each {@link Token}.
|
410 | *
|
411 | * @param {string} text A string with the code to be highlighted.
|
412 | * @param {Grammar} grammar An object containing the tokens to use.
|
413 | *
|
414 | * Usually a language definition like `Prism.languages.markup`.
|
415 | * @param {string} language The name of the language definition passed to `grammar`.
|
416 | * @returns {string} The highlighted HTML.
|
417 | * @memberof Prism
|
418 | * @public
|
419 | * @example
|
420 | * Prism.highlight('var foo = true;', Prism.languages.javascript, 'javascript');
|
421 | */
|
422 | highlight: function (text, grammar, language) {
|
423 | var env = {
|
424 | code: text,
|
425 | grammar: grammar,
|
426 | language: language
|
427 | };
|
428 | _.hooks.run('before-tokenize', env);
|
429 | env.tokens = _.tokenize(env.code, env.grammar);
|
430 | _.hooks.run('after-tokenize', env);
|
431 | return Token.stringify(_.util.encode(env.tokens), env.language);
|
432 | },
|
433 |
|
434 | /**
|
435 | * This is the heart of Prism, and the most low-level function you can use. It accepts a string of text as input
|
436 | * and the language definitions to use, and returns an array with the tokenized code.
|
437 | *
|
438 | * When the language definition includes nested tokens, the function is called recursively on each of these tokens.
|
439 | *
|
440 | * This method could be useful in other contexts as well, as a very crude parser.
|
441 | *
|
442 | * @param {string} text A string with the code to be highlighted.
|
443 | * @param {Grammar} grammar An object containing the tokens to use.
|
444 | *
|
445 | * Usually a language definition like `Prism.languages.markup`.
|
446 | * @returns {TokenStream} An array of strings and tokens, a token stream.
|
447 | * @memberof Prism
|
448 | * @public
|
449 | * @example
|
450 | * let code = `var foo = 0;`;
|
451 | * let tokens = Prism.tokenize(code, Prism.languages.javascript);
|
452 | * tokens.forEach(token => {
|
453 | * if (token instanceof Prism.Token && token.type === 'number') {
|
454 | * console.log(`Found numeric literal: ${token.content}`);
|
455 | * }
|
456 | * });
|
457 | */
|
458 | tokenize: function (text, grammar) {
|
459 | var rest = grammar.rest;
|
460 | if (rest) {
|
461 | for (var token in rest) {
|
462 | grammar[token] = rest[token];
|
463 | }
|
464 |
|
465 | delete grammar.rest;
|
466 | }
|
467 |
|
468 | var tokenList = new LinkedList();
|
469 | addAfter(tokenList, tokenList.head, text);
|
470 |
|
471 | matchGrammar(text, tokenList, grammar, tokenList.head, 0);
|
472 |
|
473 | return toArray(tokenList);
|
474 | },
|
475 |
|
476 | /**
|
477 | * @namespace
|
478 | * @memberof Prism
|
479 | * @public
|
480 | */
|
481 | hooks: {
|
482 | all: {},
|
483 |
|
484 | /**
|
485 | * Adds the given callback to the list of callbacks for the given hook.
|
486 | *
|
487 | * The callback will be invoked when the hook it is registered for is run.
|
488 | * Hooks are usually directly run by a highlight function but you can also run hooks yourself.
|
489 | *
|
490 | * One callback function can be registered to multiple hooks and the same hook multiple times.
|
491 | *
|
492 | * @param {string} name The name of the hook.
|
493 | * @param {HookCallback} callback The callback function which is given environment variables.
|
494 | * @public
|
495 | */
|
496 | add: function (name, callback) {
|
497 | var hooks = _.hooks.all;
|
498 |
|
499 | hooks[name] = hooks[name] || [];
|
500 |
|
501 | hooks[name].push(callback);
|
502 | },
|
503 |
|
504 | /**
|
505 | * Runs a hook invoking all registered callbacks with the given environment variables.
|
506 | *
|
507 | * Callbacks will be invoked synchronously and in the order in which they were registered.
|
508 | *
|
509 | * @param {string} name The name of the hook.
|
510 | * @param {Object<string, any>} env The environment variables of the hook passed to all callbacks registered.
|
511 | * @public
|
512 | */
|
513 | run: function (name, env) {
|
514 | var callbacks = _.hooks.all[name];
|
515 |
|
516 | if (!callbacks || !callbacks.length) {
|
517 | return;
|
518 | }
|
519 |
|
520 | for (var i = 0, callback; (callback = callbacks[i++]);) {
|
521 | callback(env);
|
522 | }
|
523 | }
|
524 | },
|
525 |
|
526 | Token: Token
|
527 | };
|
528 |
|
529 |
|
530 | // Typescript note:
|
531 | // The following can be used to import the Token type in JSDoc:
|
532 | //
|
533 | // @typedef {InstanceType<import("./prism-core")["Token"]>} Token
|
534 |
|
535 | /**
|
536 | * Creates a new token.
|
537 | *
|
538 | * @param {string} type See {@link Token#type type}
|
539 | * @param {string | TokenStream} content See {@link Token#content content}
|
540 | * @param {string|string[]} [alias] The alias(es) of the token.
|
541 | * @param {string} [matchedStr=""] A copy of the full string this token was created from.
|
542 | * @class
|
543 | * @global
|
544 | * @public
|
545 | */
|
546 | function Token(type, content, alias, matchedStr) {
|
547 | /**
|
548 | * The type of the token.
|
549 | *
|
550 | * This is usually the key of a pattern in a {@link Grammar}.
|
551 | *
|
552 | * @type {string}
|
553 | * @see GrammarToken
|
554 | * @public
|
555 | */
|
556 | this.type = type;
|
557 | /**
|
558 | * The strings or tokens contained by this token.
|
559 | *
|
560 | * This will be a token stream if the pattern matched also defined an `inside` grammar.
|
561 | *
|
562 | * @type {string | TokenStream}
|
563 | * @public
|
564 | */
|
565 | this.content = content;
|
566 | /**
|
567 | * The alias(es) of the token.
|
568 | *
|
569 | * @type {string|string[]}
|
570 | * @see GrammarToken
|
571 | * @public
|
572 | */
|
573 | this.alias = alias;
|
574 | // Copy of the full string this token was created from
|
575 | this.length = (matchedStr || '').length | 0;
|
576 | }
|
577 |
|
578 | /**
|
579 | * A token stream is an array of strings and {@link Token Token} objects.
|
580 | *
|
581 | * Token streams have to fulfill a few properties that are assumed by most functions (mostly internal ones) that process
|
582 | * them.
|
583 | *
|
584 | * 1. No adjacent strings.
|
585 | * 2. No empty strings.
|
586 | *
|
587 | * The only exception here is the token stream that only contains the empty string and nothing else.
|
588 | *
|
589 | * @typedef {Array<string | Token>} TokenStream
|
590 | * @global
|
591 | * @public
|
592 | */
|
593 |
|
594 | /**
|
595 | * Converts the given token or token stream to an HTML representation.
|
596 | *
|
597 | * The following hooks will be run:
|
598 | * 1. `wrap`: On each {@link Token}.
|
599 | *
|
600 | * @param {string | Token | TokenStream} o The token or token stream to be converted.
|
601 | * @param {string} language The name of current language.
|
602 | * @returns {string} The HTML representation of the token or token stream.
|
603 | * @memberof Token
|
604 | * @static
|
605 | */
|
606 | Token.stringify = function stringify(o, language) {
|
607 | if (typeof o == 'string') {
|
608 | return o;
|
609 | }
|
610 | if (Array.isArray(o)) {
|
611 | var s = '';
|
612 | o.forEach(function (e) {
|
613 | s += stringify(e, language);
|
614 | });
|
615 | return s;
|
616 | }
|
617 |
|
618 | var env = {
|
619 | type: o.type,
|
620 | content: stringify(o.content, language),
|
621 | tag: 'span',
|
622 | classes: ['token', o.type],
|
623 | attributes: {},
|
624 | language: language
|
625 | };
|
626 |
|
627 | var aliases = o.alias;
|
628 | if (aliases) {
|
629 | if (Array.isArray(aliases)) {
|
630 | Array.prototype.push.apply(env.classes, aliases);
|
631 | } else {
|
632 | env.classes.push(aliases);
|
633 | }
|
634 | }
|
635 |
|
636 | _.hooks.run('wrap', env);
|
637 |
|
638 | var attributes = '';
|
639 | for (var name in env.attributes) {
|
640 | attributes += ' ' + name + '="' + (env.attributes[name] || '').replace(/"/g, '"') + '"';
|
641 | }
|
642 |
|
643 | return '<' + env.tag + ' class="' + env.classes.join(' ') + '"' + attributes + '>' + env.content + '</' + env.tag + '>';
|
644 | };
|
645 |
|
646 | /**
|
647 | * @param {RegExp} pattern
|
648 | * @param {number} pos
|
649 | * @param {string} text
|
650 | * @param {boolean} lookbehind
|
651 | * @returns {RegExpExecArray | null}
|
652 | */
|
653 | function matchPattern(pattern, pos, text, lookbehind) {
|
654 | pattern.lastIndex = pos;
|
655 | var match = pattern.exec(text);
|
656 | if (match && lookbehind && match[1]) {
|
657 | // change the match to remove the text matched by the Prism lookbehind group
|
658 | var lookbehindLength = match[1].length;
|
659 | match.index += lookbehindLength;
|
660 | match[0] = match[0].slice(lookbehindLength);
|
661 | }
|
662 | return match;
|
663 | }
|
664 |
|
665 | /**
|
666 | * @param {string} text
|
667 | * @param {LinkedList<string | Token>} tokenList
|
668 | * @param {any} grammar
|
669 | * @param {LinkedListNode<string | Token>} startNode
|
670 | * @param {number} startPos
|
671 | * @param {RematchOptions} [rematch]
|
672 | * @returns {void}
|
673 | * @private
|
674 | *
|
675 | * @typedef RematchOptions
|
676 | * @property {string} cause
|
677 | * @property {number} reach
|
678 | */
|
679 | function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) {
|
680 | for (var token in grammar) {
|
681 | if (!grammar.hasOwnProperty(token) || !grammar[token]) {
|
682 | continue;
|
683 | }
|
684 |
|
685 | var patterns = grammar[token];
|
686 | patterns = Array.isArray(patterns) ? patterns : [patterns];
|
687 |
|
688 | for (var j = 0; j < patterns.length; ++j) {
|
689 | if (rematch && rematch.cause == token + ',' + j) {
|
690 | return;
|
691 | }
|
692 |
|
693 | var patternObj = patterns[j];
|
694 | var inside = patternObj.inside;
|
695 | var lookbehind = !!patternObj.lookbehind;
|
696 | var greedy = !!patternObj.greedy;
|
697 | var alias = patternObj.alias;
|
698 |
|
699 | if (greedy && !patternObj.pattern.global) {
|
700 | // Without the global flag, lastIndex won't work
|
701 | var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0];
|
702 | patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g');
|
703 | }
|
704 |
|
705 | /** @type {RegExp} */
|
706 | var pattern = patternObj.pattern || patternObj;
|
707 |
|
708 | for ( // iterate the token list and keep track of the current token/string position
|
709 | var currentNode = startNode.next, pos = startPos;
|
710 | currentNode !== tokenList.tail;
|
711 | pos += currentNode.value.length, currentNode = currentNode.next
|
712 | ) {
|
713 |
|
714 | if (rematch && pos >= rematch.reach) {
|
715 | break;
|
716 | }
|
717 |
|
718 | var str = currentNode.value;
|
719 |
|
720 | if (tokenList.length > text.length) {
|
721 | // Something went terribly wrong, ABORT, ABORT!
|
722 | return;
|
723 | }
|
724 |
|
725 | if (str instanceof Token) {
|
726 | continue;
|
727 | }
|
728 |
|
729 | var removeCount = 1; // this is the to parameter of removeBetween
|
730 | var match;
|
731 |
|
732 | if (greedy) {
|
733 | match = matchPattern(pattern, pos, text, lookbehind);
|
734 | if (!match || match.index >= text.length) {
|
735 | break;
|
736 | }
|
737 |
|
738 | var from = match.index;
|
739 | var to = match.index + match[0].length;
|
740 | var p = pos;
|
741 |
|
742 | // find the node that contains the match
|
743 | p += currentNode.value.length;
|
744 | while (from >= p) {
|
745 | currentNode = currentNode.next;
|
746 | p += currentNode.value.length;
|
747 | }
|
748 | // adjust pos (and p)
|
749 | p -= currentNode.value.length;
|
750 | pos = p;
|
751 |
|
752 | // the current node is a Token, then the match starts inside another Token, which is invalid
|
753 | if (currentNode.value instanceof Token) {
|
754 | continue;
|
755 | }
|
756 |
|
757 | // find the last node which is affected by this match
|
758 | for (
|
759 | var k = currentNode;
|
760 | k !== tokenList.tail && (p < to || typeof k.value === 'string');
|
761 | k = k.next
|
762 | ) {
|
763 | removeCount++;
|
764 | p += k.value.length;
|
765 | }
|
766 | removeCount--;
|
767 |
|
768 | // replace with the new match
|
769 | str = text.slice(pos, p);
|
770 | match.index -= pos;
|
771 | } else {
|
772 | match = matchPattern(pattern, 0, str, lookbehind);
|
773 | if (!match) {
|
774 | continue;
|
775 | }
|
776 | }
|
777 |
|
778 | // eslint-disable-next-line no-redeclare
|
779 | var from = match.index;
|
780 | var matchStr = match[0];
|
781 | var before = str.slice(0, from);
|
782 | var after = str.slice(from + matchStr.length);
|
783 |
|
784 | var reach = pos + str.length;
|
785 | if (rematch && reach > rematch.reach) {
|
786 | rematch.reach = reach;
|
787 | }
|
788 |
|
789 | var removeFrom = currentNode.prev;
|
790 |
|
791 | if (before) {
|
792 | removeFrom = addAfter(tokenList, removeFrom, before);
|
793 | pos += before.length;
|
794 | }
|
795 |
|
796 | removeRange(tokenList, removeFrom, removeCount);
|
797 |
|
798 | var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr);
|
799 | currentNode = addAfter(tokenList, removeFrom, wrapped);
|
800 |
|
801 | if (after) {
|
802 | addAfter(tokenList, currentNode, after);
|
803 | }
|
804 |
|
805 | if (removeCount > 1) {
|
806 | // at least one Token object was removed, so we have to do some rematching
|
807 | // this can only happen if the current pattern is greedy
|
808 |
|
809 | /** @type {RematchOptions} */
|
810 | var nestedRematch = {
|
811 | cause: token + ',' + j,
|
812 | reach: reach
|
813 | };
|
814 | matchGrammar(text, tokenList, grammar, currentNode.prev, pos, nestedRematch);
|
815 |
|
816 | // the reach might have been extended because of the rematching
|
817 | if (rematch && nestedRematch.reach > rematch.reach) {
|
818 | rematch.reach = nestedRematch.reach;
|
819 | }
|
820 | }
|
821 | }
|
822 | }
|
823 | }
|
824 | }
|
825 |
|
826 | /**
|
827 | * @typedef LinkedListNode
|
828 | * @property {T} value
|
829 | * @property {LinkedListNode<T> | null} prev The previous node.
|
830 | * @property {LinkedListNode<T> | null} next The next node.
|
831 | * @template T
|
832 | * @private
|
833 | */
|
834 |
|
835 | /**
|
836 | * @template T
|
837 | * @private
|
838 | */
|
839 | function LinkedList() {
|
840 | /** @type {LinkedListNode<T>} */
|
841 | var head = { value: null, prev: null, next: null };
|
842 | /** @type {LinkedListNode<T>} */
|
843 | var tail = { value: null, prev: head, next: null };
|
844 | head.next = tail;
|
845 |
|
846 | /** @type {LinkedListNode<T>} */
|
847 | this.head = head;
|
848 | /** @type {LinkedListNode<T>} */
|
849 | this.tail = tail;
|
850 | this.length = 0;
|
851 | }
|
852 |
|
853 | /**
|
854 | * Adds a new node with the given value to the list.
|
855 | *
|
856 | * @param {LinkedList<T>} list
|
857 | * @param {LinkedListNode<T>} node
|
858 | * @param {T} value
|
859 | * @returns {LinkedListNode<T>} The added node.
|
860 | * @template T
|
861 | */
|
862 | function addAfter(list, node, value) {
|
863 | // assumes that node != list.tail && values.length >= 0
|
864 | var next = node.next;
|
865 |
|
866 | var newNode = { value: value, prev: node, next: next };
|
867 | node.next = newNode;
|
868 | next.prev = newNode;
|
869 | list.length++;
|
870 |
|
871 | return newNode;
|
872 | }
|
873 | /**
|
874 | * Removes `count` nodes after the given node. The given node will not be removed.
|
875 | *
|
876 | * @param {LinkedList<T>} list
|
877 | * @param {LinkedListNode<T>} node
|
878 | * @param {number} count
|
879 | * @template T
|
880 | */
|
881 | function removeRange(list, node, count) {
|
882 | var next = node.next;
|
883 | for (var i = 0; i < count && next !== list.tail; i++) {
|
884 | next = next.next;
|
885 | }
|
886 | node.next = next;
|
887 | next.prev = node;
|
888 | list.length -= i;
|
889 | }
|
890 | /**
|
891 | * @param {LinkedList<T>} list
|
892 | * @returns {T[]}
|
893 | * @template T
|
894 | */
|
895 | function toArray(list) {
|
896 | var array = [];
|
897 | var node = list.head.next;
|
898 | while (node !== list.tail) {
|
899 | array.push(node.value);
|
900 | node = node.next;
|
901 | }
|
902 | return array;
|
903 | }
|
904 |
|
905 | return _;
|
906 |
|
907 | }());
|
908 |
|
909 | var prism = Prism;
|
910 | Prism.default = Prism;
|
911 |
|
912 | exports.prism = prism;
|