1 | const Tokenizer = require('./Tokenizer.js');
|
2 | const { defaults } = require('./defaults.js');
|
3 | const { block, inline } = require('./rules.js');
|
4 | const { repeatString } = require('./helpers.js');
|
5 |
|
6 |
|
7 |
|
8 |
|
9 | function smartypants(text) {
|
10 | return text
|
11 |
|
12 | .replace(/---/g, '\u2014')
|
13 |
|
14 | .replace(/--/g, '\u2013')
|
15 |
|
16 | .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
|
17 |
|
18 | .replace(/'/g, '\u2019')
|
19 |
|
20 | .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
|
21 |
|
22 | .replace(/"/g, '\u201d')
|
23 |
|
24 | .replace(/\.{3}/g, '\u2026');
|
25 | }
|
26 |
|
27 |
|
28 |
|
29 |
|
30 | function mangle(text) {
|
31 | let out = '',
|
32 | i,
|
33 | ch;
|
34 |
|
35 | const l = text.length;
|
36 | for (i = 0; i < l; i++) {
|
37 | ch = text.charCodeAt(i);
|
38 | if (Math.random() > 0.5) {
|
39 | ch = 'x' + ch.toString(16);
|
40 | }
|
41 | out += '&#' + ch + ';';
|
42 | }
|
43 |
|
44 | return out;
|
45 | }
|
46 |
|
47 |
|
48 |
|
49 |
|
50 | module.exports = class Lexer {
|
51 | constructor(options) {
|
52 | this.tokens = [];
|
53 | this.tokens.links = Object.create(null);
|
54 | this.options = options || defaults;
|
55 | this.options.tokenizer = this.options.tokenizer || new Tokenizer();
|
56 | this.tokenizer = this.options.tokenizer;
|
57 | this.tokenizer.options = this.options;
|
58 |
|
59 | const rules = {
|
60 | block: block.normal,
|
61 | inline: inline.normal
|
62 | };
|
63 |
|
64 | if (this.options.pedantic) {
|
65 | rules.block = block.pedantic;
|
66 | rules.inline = inline.pedantic;
|
67 | } else if (this.options.gfm) {
|
68 | rules.block = block.gfm;
|
69 | if (this.options.breaks) {
|
70 | rules.inline = inline.breaks;
|
71 | } else {
|
72 | rules.inline = inline.gfm;
|
73 | }
|
74 | }
|
75 | this.tokenizer.rules = rules;
|
76 | }
|
77 |
|
78 | |
79 |
|
80 |
|
81 | static get rules() {
|
82 | return {
|
83 | block,
|
84 | inline
|
85 | };
|
86 | }
|
87 |
|
88 | |
89 |
|
90 |
|
91 | static lex(src, options) {
|
92 | const lexer = new Lexer(options);
|
93 | return lexer.lex(src);
|
94 | }
|
95 |
|
96 | |
97 |
|
98 |
|
99 | static lexInline(src, options) {
|
100 | const lexer = new Lexer(options);
|
101 | return lexer.inlineTokens(src);
|
102 | }
|
103 |
|
104 | |
105 |
|
106 |
|
107 | lex(src) {
|
108 | src = src
|
109 | .replace(/\r\n|\r/g, '\n')
|
110 | .replace(/\t/g, ' ');
|
111 |
|
112 | this.blockTokens(src, this.tokens, true);
|
113 |
|
114 | this.inline(this.tokens);
|
115 |
|
116 | return this.tokens;
|
117 | }
|
118 |
|
119 | |
120 |
|
121 |
|
122 | blockTokens(src, tokens = [], top = true) {
|
123 | if (this.options.pedantic) {
|
124 | src = src.replace(/^ +$/gm, '');
|
125 | }
|
126 | let token, i, l, lastToken, cutSrc, lastParagraphClipped;
|
127 |
|
128 | while (src) {
|
129 | if (this.options.extensions
|
130 | && this.options.extensions.block
|
131 | && this.options.extensions.block.some((extTokenizer) => {
|
132 | if (token = extTokenizer.call(this, src, tokens)) {
|
133 | src = src.substring(token.raw.length);
|
134 | tokens.push(token);
|
135 | return true;
|
136 | }
|
137 | return false;
|
138 | })) {
|
139 | continue;
|
140 | }
|
141 |
|
142 |
|
143 | if (token = this.tokenizer.space(src)) {
|
144 | src = src.substring(token.raw.length);
|
145 | if (token.type) {
|
146 | tokens.push(token);
|
147 | }
|
148 | continue;
|
149 | }
|
150 |
|
151 |
|
152 | if (token = this.tokenizer.code(src)) {
|
153 | src = src.substring(token.raw.length);
|
154 | lastToken = tokens[tokens.length - 1];
|
155 |
|
156 | if (lastToken && lastToken.type === 'paragraph') {
|
157 | lastToken.raw += '\n' + token.raw;
|
158 | lastToken.text += '\n' + token.text;
|
159 | } else {
|
160 | tokens.push(token);
|
161 | }
|
162 | continue;
|
163 | }
|
164 |
|
165 |
|
166 | if (token = this.tokenizer.fences(src)) {
|
167 | src = src.substring(token.raw.length);
|
168 | tokens.push(token);
|
169 | continue;
|
170 | }
|
171 |
|
172 |
|
173 | if (token = this.tokenizer.heading(src)) {
|
174 | src = src.substring(token.raw.length);
|
175 | tokens.push(token);
|
176 | continue;
|
177 | }
|
178 |
|
179 |
|
180 | if (token = this.tokenizer.nptable(src)) {
|
181 | src = src.substring(token.raw.length);
|
182 | tokens.push(token);
|
183 | continue;
|
184 | }
|
185 |
|
186 |
|
187 | if (token = this.tokenizer.hr(src)) {
|
188 | src = src.substring(token.raw.length);
|
189 | tokens.push(token);
|
190 | continue;
|
191 | }
|
192 |
|
193 |
|
194 | if (token = this.tokenizer.blockquote(src)) {
|
195 | src = src.substring(token.raw.length);
|
196 | token.tokens = this.blockTokens(token.text, [], top);
|
197 | tokens.push(token);
|
198 | continue;
|
199 | }
|
200 |
|
201 |
|
202 | if (token = this.tokenizer.list(src)) {
|
203 | src = src.substring(token.raw.length);
|
204 | l = token.items.length;
|
205 | for (i = 0; i < l; i++) {
|
206 | token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
|
207 | }
|
208 | tokens.push(token);
|
209 | continue;
|
210 | }
|
211 |
|
212 |
|
213 | if (token = this.tokenizer.html(src)) {
|
214 | src = src.substring(token.raw.length);
|
215 | tokens.push(token);
|
216 | continue;
|
217 | }
|
218 |
|
219 |
|
220 | if (top && (token = this.tokenizer.def(src))) {
|
221 | src = src.substring(token.raw.length);
|
222 | if (!this.tokens.links[token.tag]) {
|
223 | this.tokens.links[token.tag] = {
|
224 | href: token.href,
|
225 | title: token.title
|
226 | };
|
227 | }
|
228 | continue;
|
229 | }
|
230 |
|
231 |
|
232 | if (token = this.tokenizer.table(src)) {
|
233 | src = src.substring(token.raw.length);
|
234 | tokens.push(token);
|
235 | continue;
|
236 | }
|
237 |
|
238 |
|
239 | if (token = this.tokenizer.lheading(src)) {
|
240 | src = src.substring(token.raw.length);
|
241 | tokens.push(token);
|
242 | continue;
|
243 | }
|
244 |
|
245 |
|
246 |
|
247 | cutSrc = src;
|
248 | if (this.options.extensions && this.options.extensions.startBlock) {
|
249 | let startIndex = Infinity;
|
250 | const tempSrc = src.slice(1);
|
251 | let tempStart;
|
252 | this.options.extensions.startBlock.forEach(function(getStartIndex) {
|
253 | tempStart = getStartIndex.call(this, tempSrc);
|
254 | if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
|
255 | });
|
256 | if (startIndex < Infinity && startIndex >= 0) {
|
257 | cutSrc = src.substring(0, startIndex + 1);
|
258 | }
|
259 | }
|
260 | if (top && (token = this.tokenizer.paragraph(cutSrc))) {
|
261 | lastToken = tokens[tokens.length - 1];
|
262 | if (lastParagraphClipped && lastToken.type === 'paragraph') {
|
263 | lastToken.raw += '\n' + token.raw;
|
264 | lastToken.text += '\n' + token.text;
|
265 | } else {
|
266 | tokens.push(token);
|
267 | }
|
268 | lastParagraphClipped = (cutSrc.length !== src.length);
|
269 | src = src.substring(token.raw.length);
|
270 | continue;
|
271 | }
|
272 |
|
273 |
|
274 | if (token = this.tokenizer.text(src)) {
|
275 | src = src.substring(token.raw.length);
|
276 | lastToken = tokens[tokens.length - 1];
|
277 | if (lastToken && lastToken.type === 'text') {
|
278 | lastToken.raw += '\n' + token.raw;
|
279 | lastToken.text += '\n' + token.text;
|
280 | } else {
|
281 | tokens.push(token);
|
282 | }
|
283 | continue;
|
284 | }
|
285 |
|
286 | if (src) {
|
287 | const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
|
288 | if (this.options.silent) {
|
289 | console.error(errMsg);
|
290 | break;
|
291 | } else {
|
292 | throw new Error(errMsg);
|
293 | }
|
294 | }
|
295 | }
|
296 |
|
297 | return tokens;
|
298 | }
|
299 |
|
300 | inline(tokens) {
|
301 | let i,
|
302 | j,
|
303 | k,
|
304 | l2,
|
305 | row,
|
306 | token;
|
307 |
|
308 | const l = tokens.length;
|
309 | for (i = 0; i < l; i++) {
|
310 | token = tokens[i];
|
311 | switch (token.type) {
|
312 | case 'paragraph':
|
313 | case 'text':
|
314 | case 'heading': {
|
315 | token.tokens = [];
|
316 | this.inlineTokens(token.text, token.tokens);
|
317 | break;
|
318 | }
|
319 | case 'table': {
|
320 | token.tokens = {
|
321 | header: [],
|
322 | cells: []
|
323 | };
|
324 |
|
325 |
|
326 | l2 = token.header.length;
|
327 | for (j = 0; j < l2; j++) {
|
328 | token.tokens.header[j] = [];
|
329 | this.inlineTokens(token.header[j], token.tokens.header[j]);
|
330 | }
|
331 |
|
332 |
|
333 | l2 = token.cells.length;
|
334 | for (j = 0; j < l2; j++) {
|
335 | row = token.cells[j];
|
336 | token.tokens.cells[j] = [];
|
337 | for (k = 0; k < row.length; k++) {
|
338 | token.tokens.cells[j][k] = [];
|
339 | this.inlineTokens(row[k], token.tokens.cells[j][k]);
|
340 | }
|
341 | }
|
342 |
|
343 | break;
|
344 | }
|
345 | case 'blockquote': {
|
346 | this.inline(token.tokens);
|
347 | break;
|
348 | }
|
349 | case 'list': {
|
350 | l2 = token.items.length;
|
351 | for (j = 0; j < l2; j++) {
|
352 | this.inline(token.items[j].tokens);
|
353 | }
|
354 | break;
|
355 | }
|
356 | default: {
|
357 |
|
358 | }
|
359 | }
|
360 | }
|
361 |
|
362 | return tokens;
|
363 | }
|
364 |
|
365 | |
366 |
|
367 |
|
368 | inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
|
369 | let token, lastToken, cutSrc;
|
370 |
|
371 |
|
372 | let maskedSrc = src;
|
373 | let match;
|
374 | let keepPrevChar, prevChar;
|
375 |
|
376 |
|
377 | if (this.tokens.links) {
|
378 | const links = Object.keys(this.tokens.links);
|
379 | if (links.length > 0) {
|
380 | while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
|
381 | if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
|
382 | maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
|
383 | }
|
384 | }
|
385 | }
|
386 | }
|
387 |
|
388 | while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
|
389 | maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
|
390 | }
|
391 |
|
392 |
|
393 | while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
|
394 | maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
|
395 | }
|
396 |
|
397 | while (src) {
|
398 | if (!keepPrevChar) {
|
399 | prevChar = '';
|
400 | }
|
401 | keepPrevChar = false;
|
402 |
|
403 |
|
404 | if (this.options.extensions
|
405 | && this.options.extensions.inline
|
406 | && this.options.extensions.inline.some((extTokenizer) => {
|
407 | if (token = extTokenizer.call(this, src, tokens)) {
|
408 | src = src.substring(token.raw.length);
|
409 | tokens.push(token);
|
410 | return true;
|
411 | }
|
412 | return false;
|
413 | })) {
|
414 | continue;
|
415 | }
|
416 |
|
417 |
|
418 | if (token = this.tokenizer.escape(src)) {
|
419 | src = src.substring(token.raw.length);
|
420 | tokens.push(token);
|
421 | continue;
|
422 | }
|
423 |
|
424 |
|
425 | if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
|
426 | src = src.substring(token.raw.length);
|
427 | inLink = token.inLink;
|
428 | inRawBlock = token.inRawBlock;
|
429 | lastToken = tokens[tokens.length - 1];
|
430 | if (lastToken && token.type === 'text' && lastToken.type === 'text') {
|
431 | lastToken.raw += token.raw;
|
432 | lastToken.text += token.text;
|
433 | } else {
|
434 | tokens.push(token);
|
435 | }
|
436 | continue;
|
437 | }
|
438 |
|
439 |
|
440 | if (token = this.tokenizer.link(src)) {
|
441 | src = src.substring(token.raw.length);
|
442 | if (token.type === 'link') {
|
443 | token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
|
444 | }
|
445 | tokens.push(token);
|
446 | continue;
|
447 | }
|
448 |
|
449 |
|
450 | if (token = this.tokenizer.reflink(src, this.tokens.links)) {
|
451 | src = src.substring(token.raw.length);
|
452 | lastToken = tokens[tokens.length - 1];
|
453 | if (token.type === 'link') {
|
454 | token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
|
455 | tokens.push(token);
|
456 | } else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
|
457 | lastToken.raw += token.raw;
|
458 | lastToken.text += token.text;
|
459 | } else {
|
460 | tokens.push(token);
|
461 | }
|
462 | continue;
|
463 | }
|
464 |
|
465 |
|
466 | if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
|
467 | src = src.substring(token.raw.length);
|
468 | token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
|
469 | tokens.push(token);
|
470 | continue;
|
471 | }
|
472 |
|
473 |
|
474 | if (token = this.tokenizer.codespan(src)) {
|
475 | src = src.substring(token.raw.length);
|
476 | tokens.push(token);
|
477 | continue;
|
478 | }
|
479 |
|
480 |
|
481 | if (token = this.tokenizer.br(src)) {
|
482 | src = src.substring(token.raw.length);
|
483 | tokens.push(token);
|
484 | continue;
|
485 | }
|
486 |
|
487 |
|
488 | if (token = this.tokenizer.del(src)) {
|
489 | src = src.substring(token.raw.length);
|
490 | token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
|
491 | tokens.push(token);
|
492 | continue;
|
493 | }
|
494 |
|
495 |
|
496 | if (token = this.tokenizer.autolink(src, mangle)) {
|
497 | src = src.substring(token.raw.length);
|
498 | tokens.push(token);
|
499 | continue;
|
500 | }
|
501 |
|
502 |
|
503 | if (!inLink && (token = this.tokenizer.url(src, mangle))) {
|
504 | src = src.substring(token.raw.length);
|
505 | tokens.push(token);
|
506 | continue;
|
507 | }
|
508 |
|
509 |
|
510 |
|
511 | cutSrc = src;
|
512 | if (this.options.extensions && this.options.extensions.startInline) {
|
513 | let startIndex = Infinity;
|
514 | const tempSrc = src.slice(1);
|
515 | let tempStart;
|
516 | this.options.extensions.startInline.forEach(function(getStartIndex) {
|
517 | tempStart = getStartIndex.call(this, tempSrc);
|
518 | if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
|
519 | });
|
520 | if (startIndex < Infinity && startIndex >= 0) {
|
521 | cutSrc = src.substring(0, startIndex + 1);
|
522 | }
|
523 | }
|
524 | if (token = this.tokenizer.inlineText(cutSrc, inRawBlock, smartypants)) {
|
525 | src = src.substring(token.raw.length);
|
526 | if (token.raw.slice(-1) !== '_') {
|
527 | prevChar = token.raw.slice(-1);
|
528 | }
|
529 | keepPrevChar = true;
|
530 | lastToken = tokens[tokens.length - 1];
|
531 | if (lastToken && lastToken.type === 'text') {
|
532 | lastToken.raw += token.raw;
|
533 | lastToken.text += token.text;
|
534 | } else {
|
535 | tokens.push(token);
|
536 | }
|
537 | continue;
|
538 | }
|
539 |
|
540 | if (src) {
|
541 | const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
|
542 | if (this.options.silent) {
|
543 | console.error(errMsg);
|
544 | break;
|
545 | } else {
|
546 | throw new Error(errMsg);
|
547 | }
|
548 | }
|
549 | }
|
550 |
|
551 | return tokens;
|
552 | }
|
553 | };
|