UNPKG

18.9 kBJavaScriptView Raw
1const { defaults } = require('./defaults.js');
2const {
3 rtrim,
4 splitCells,
5 escape,
6 findClosingBracket
7} = require('./helpers.js');
8
9function outputLink(cap, link, raw) {
10 const href = link.href;
11 const title = link.title ? escape(link.title) : null;
12 const text = cap[1].replace(/\\([\[\]])/g, '$1');
13
14 if (cap[0].charAt(0) !== '!') {
15 return {
16 type: 'link',
17 raw,
18 href,
19 title,
20 text
21 };
22 } else {
23 return {
24 type: 'image',
25 raw,
26 href,
27 title,
28 text: escape(text)
29 };
30 }
31}
32
33function indentCodeCompensation(raw, text) {
34 const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
35
36 if (matchIndentToCode === null) {
37 return text;
38 }
39
40 const indentToCode = matchIndentToCode[1];
41
42 return text
43 .split('\n')
44 .map(node => {
45 const matchIndentInNode = node.match(/^\s+/);
46 if (matchIndentInNode === null) {
47 return node;
48 }
49
50 const [indentInNode] = matchIndentInNode;
51
52 if (indentInNode.length >= indentToCode.length) {
53 return node.slice(indentToCode.length);
54 }
55
56 return node;
57 })
58 .join('\n');
59}
60
61/**
62 * Tokenizer
63 */
64module.exports = class Tokenizer {
65 constructor(options) {
66 this.options = options || defaults;
67 }
68
69 space(src) {
70 const cap = this.rules.block.newline.exec(src);
71 if (cap) {
72 if (cap[0].length > 1) {
73 return {
74 type: 'space',
75 raw: cap[0]
76 };
77 }
78 return { raw: '\n' };
79 }
80 }
81
82 code(src) {
83 const cap = this.rules.block.code.exec(src);
84 if (cap) {
85 const text = cap[0].replace(/^ {1,4}/gm, '');
86 return {
87 type: 'code',
88 raw: cap[0],
89 codeBlockStyle: 'indented',
90 text: !this.options.pedantic
91 ? rtrim(text, '\n')
92 : text
93 };
94 }
95 }
96
97 fences(src) {
98 const cap = this.rules.block.fences.exec(src);
99 if (cap) {
100 const raw = cap[0];
101 const text = indentCodeCompensation(raw, cap[3] || '');
102
103 return {
104 type: 'code',
105 raw,
106 lang: cap[2] ? cap[2].trim() : cap[2],
107 text
108 };
109 }
110 }
111
112 heading(src) {
113 const cap = this.rules.block.heading.exec(src);
114 if (cap) {
115 let text = cap[2].trim();
116
117 // remove trailing #s
118 if (/#$/.test(text)) {
119 const trimmed = rtrim(text, '#');
120 if (this.options.pedantic) {
121 text = trimmed.trim();
122 } else if (!trimmed || / $/.test(trimmed)) {
123 // CommonMark requires space before trailing #s
124 text = trimmed.trim();
125 }
126 }
127
128 return {
129 type: 'heading',
130 raw: cap[0],
131 depth: cap[1].length,
132 text: text
133 };
134 }
135 }
136
137 nptable(src) {
138 const cap = this.rules.block.nptable.exec(src);
139 if (cap) {
140 const item = {
141 type: 'table',
142 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
143 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
144 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
145 raw: cap[0]
146 };
147
148 if (item.header.length === item.align.length) {
149 let l = item.align.length;
150 let i;
151 for (i = 0; i < l; i++) {
152 if (/^ *-+: *$/.test(item.align[i])) {
153 item.align[i] = 'right';
154 } else if (/^ *:-+: *$/.test(item.align[i])) {
155 item.align[i] = 'center';
156 } else if (/^ *:-+ *$/.test(item.align[i])) {
157 item.align[i] = 'left';
158 } else {
159 item.align[i] = null;
160 }
161 }
162
163 l = item.cells.length;
164 for (i = 0; i < l; i++) {
165 item.cells[i] = splitCells(item.cells[i], item.header.length);
166 }
167
168 return item;
169 }
170 }
171 }
172
173 hr(src) {
174 const cap = this.rules.block.hr.exec(src);
175 if (cap) {
176 return {
177 type: 'hr',
178 raw: cap[0]
179 };
180 }
181 }
182
183 blockquote(src) {
184 const cap = this.rules.block.blockquote.exec(src);
185 if (cap) {
186 const text = cap[0].replace(/^ *> ?/gm, '');
187
188 return {
189 type: 'blockquote',
190 raw: cap[0],
191 text
192 };
193 }
194 }
195
196 list(src) {
197 const cap = this.rules.block.list.exec(src);
198 if (cap) {
199 let raw = cap[0];
200 const bull = cap[2];
201 const isordered = bull.length > 1;
202
203 const list = {
204 type: 'list',
205 raw,
206 ordered: isordered,
207 start: isordered ? +bull.slice(0, -1) : '',
208 loose: false,
209 items: []
210 };
211
212 // Get each top-level item.
213 const itemMatch = cap[0].match(this.rules.block.item);
214
215 let next = false,
216 item,
217 space,
218 bcurr,
219 bnext,
220 addBack,
221 loose,
222 istask,
223 ischecked,
224 endMatch;
225
226 let l = itemMatch.length;
227 bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
228 for (let i = 0; i < l; i++) {
229 item = itemMatch[i];
230 raw = item;
231
232 if (!this.options.pedantic) {
233 // Determine if current item contains the end of the list
234 endMatch = item.match(new RegExp('\\n\\s*\\n {0,' + (bcurr[0].length - 1) + '}\\S'));
235 if (endMatch) {
236 addBack = item.length - endMatch.index + itemMatch.slice(i + 1).join('\n').length;
237 list.raw = list.raw.substring(0, list.raw.length - addBack);
238
239 item = item.substring(0, endMatch.index);
240 raw = item;
241 l = i + 1;
242 }
243 }
244
245 // Determine whether the next list item belongs here.
246 // Backpedal if it does not belong in this list.
247 if (i !== l - 1) {
248 bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
249 if (
250 !this.options.pedantic
251 ? bnext[1].length >= bcurr[0].length || bnext[1].length > 3
252 : bnext[1].length > bcurr[1].length
253 ) {
254 // nested list or continuation
255 itemMatch.splice(i, 2, itemMatch[i] + (!this.options.pedantic && bnext[1].length < bcurr[0].length && !itemMatch[i].match(/\n$/) ? '' : '\n') + itemMatch[i + 1]);
256 i--;
257 l--;
258 continue;
259 } else if (
260 // different bullet style
261 !this.options.pedantic || this.options.smartLists
262 ? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
263 : isordered === (bnext[2].length === 1)
264 ) {
265 addBack = itemMatch.slice(i + 1).join('\n').length;
266 list.raw = list.raw.substring(0, list.raw.length - addBack);
267 i = l - 1;
268 }
269 bcurr = bnext;
270 }
271
272 // Remove the list item's bullet
273 // so it is seen as the next token.
274 space = item.length;
275 item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');
276
277 // Outdent whatever the
278 // list item contains. Hacky.
279 if (~item.indexOf('\n ')) {
280 space -= item.length;
281 item = !this.options.pedantic
282 ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
283 : item.replace(/^ {1,4}/gm, '');
284 }
285
286 // trim item newlines at end
287 item = rtrim(item, '\n');
288 if (i !== l - 1) {
289 raw = raw + '\n';
290 }
291
292 // Determine whether item is loose or not.
293 // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
294 // for discount behavior.
295 loose = next || /\n\n(?!\s*$)/.test(raw);
296 if (i !== l - 1) {
297 next = raw.slice(-2) === '\n\n';
298 if (!loose) loose = next;
299 }
300
301 if (loose) {
302 list.loose = true;
303 }
304
305 // Check for task list items
306 if (this.options.gfm) {
307 istask = /^\[[ xX]\] /.test(item);
308 ischecked = undefined;
309 if (istask) {
310 ischecked = item[1] !== ' ';
311 item = item.replace(/^\[[ xX]\] +/, '');
312 }
313 }
314
315 list.items.push({
316 type: 'list_item',
317 raw,
318 task: istask,
319 checked: ischecked,
320 loose: loose,
321 text: item
322 });
323 }
324
325 return list;
326 }
327 }
328
329 html(src) {
330 const cap = this.rules.block.html.exec(src);
331 if (cap) {
332 return {
333 type: this.options.sanitize
334 ? 'paragraph'
335 : 'html',
336 raw: cap[0],
337 pre: !this.options.sanitizer
338 && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
339 text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
340 };
341 }
342 }
343
344 def(src) {
345 const cap = this.rules.block.def.exec(src);
346 if (cap) {
347 if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
348 const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
349 return {
350 type: 'def',
351 tag,
352 raw: cap[0],
353 href: cap[2],
354 title: cap[3]
355 };
356 }
357 }
358
359 table(src) {
360 const cap = this.rules.block.table.exec(src);
361 if (cap) {
362 const item = {
363 type: 'table',
364 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
365 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
366 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
367 };
368
369 if (item.header.length === item.align.length) {
370 item.raw = cap[0];
371
372 let l = item.align.length;
373 let i;
374 for (i = 0; i < l; i++) {
375 if (/^ *-+: *$/.test(item.align[i])) {
376 item.align[i] = 'right';
377 } else if (/^ *:-+: *$/.test(item.align[i])) {
378 item.align[i] = 'center';
379 } else if (/^ *:-+ *$/.test(item.align[i])) {
380 item.align[i] = 'left';
381 } else {
382 item.align[i] = null;
383 }
384 }
385
386 l = item.cells.length;
387 for (i = 0; i < l; i++) {
388 item.cells[i] = splitCells(
389 item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
390 item.header.length);
391 }
392
393 return item;
394 }
395 }
396 }
397
398 lheading(src) {
399 const cap = this.rules.block.lheading.exec(src);
400 if (cap) {
401 return {
402 type: 'heading',
403 raw: cap[0],
404 depth: cap[2].charAt(0) === '=' ? 1 : 2,
405 text: cap[1]
406 };
407 }
408 }
409
410 paragraph(src) {
411 const cap = this.rules.block.paragraph.exec(src);
412 if (cap) {
413 return {
414 type: 'paragraph',
415 raw: cap[0],
416 text: cap[1].charAt(cap[1].length - 1) === '\n'
417 ? cap[1].slice(0, -1)
418 : cap[1]
419 };
420 }
421 }
422
423 text(src) {
424 const cap = this.rules.block.text.exec(src);
425 if (cap) {
426 return {
427 type: 'text',
428 raw: cap[0],
429 text: cap[0]
430 };
431 }
432 }
433
434 escape(src) {
435 const cap = this.rules.inline.escape.exec(src);
436 if (cap) {
437 return {
438 type: 'escape',
439 raw: cap[0],
440 text: escape(cap[1])
441 };
442 }
443 }
444
445 tag(src, inLink, inRawBlock) {
446 const cap = this.rules.inline.tag.exec(src);
447 if (cap) {
448 if (!inLink && /^<a /i.test(cap[0])) {
449 inLink = true;
450 } else if (inLink && /^<\/a>/i.test(cap[0])) {
451 inLink = false;
452 }
453 if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
454 inRawBlock = true;
455 } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
456 inRawBlock = false;
457 }
458
459 return {
460 type: this.options.sanitize
461 ? 'text'
462 : 'html',
463 raw: cap[0],
464 inLink,
465 inRawBlock,
466 text: this.options.sanitize
467 ? (this.options.sanitizer
468 ? this.options.sanitizer(cap[0])
469 : escape(cap[0]))
470 : cap[0]
471 };
472 }
473 }
474
475 link(src) {
476 const cap = this.rules.inline.link.exec(src);
477 if (cap) {
478 const trimmedUrl = cap[2].trim();
479 if (!this.options.pedantic && /^</.test(trimmedUrl)) {
480 // commonmark requires matching angle brackets
481 if (!(/>$/.test(trimmedUrl))) {
482 return;
483 }
484
485 // ending angle bracket cannot be escaped
486 const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
487 if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
488 return;
489 }
490 } else {
491 // find closing parenthesis
492 const lastParenIndex = findClosingBracket(cap[2], '()');
493 if (lastParenIndex > -1) {
494 const start = cap[0].indexOf('!') === 0 ? 5 : 4;
495 const linkLen = start + cap[1].length + lastParenIndex;
496 cap[2] = cap[2].substring(0, lastParenIndex);
497 cap[0] = cap[0].substring(0, linkLen).trim();
498 cap[3] = '';
499 }
500 }
501 let href = cap[2];
502 let title = '';
503 if (this.options.pedantic) {
504 // split pedantic href and title
505 const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
506
507 if (link) {
508 href = link[1];
509 title = link[3];
510 }
511 } else {
512 title = cap[3] ? cap[3].slice(1, -1) : '';
513 }
514
515 href = href.trim();
516 if (/^</.test(href)) {
517 if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
518 // pedantic allows starting angle bracket without ending angle bracket
519 href = href.slice(1);
520 } else {
521 href = href.slice(1, -1);
522 }
523 }
524 return outputLink(cap, {
525 href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
526 title: title ? title.replace(this.rules.inline._escapes, '$1') : title
527 }, cap[0]);
528 }
529 }
530
531 reflink(src, links) {
532 let cap;
533 if ((cap = this.rules.inline.reflink.exec(src))
534 || (cap = this.rules.inline.nolink.exec(src))) {
535 let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
536 link = links[link.toLowerCase()];
537 if (!link || !link.href) {
538 const text = cap[0].charAt(0);
539 return {
540 type: 'text',
541 raw: text,
542 text
543 };
544 }
545 return outputLink(cap, link, cap[0]);
546 }
547 }
548
549 emStrong(src, maskedSrc, prevChar = '') {
550 let match = this.rules.inline.emStrong.lDelim.exec(src);
551 if (!match) return;
552
553 // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
554 if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return;
555
556 const nextChar = match[1] || match[2] || '';
557
558 if (!nextChar || (nextChar && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
559 const lLength = match[0].length - 1;
560 let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
561
562 const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
563 endReg.lastIndex = 0;
564
565 // Clip maskedSrc to same section of string as src (move to lexer?)
566 maskedSrc = maskedSrc.slice(-1 * src.length + lLength);
567
568 while ((match = endReg.exec(maskedSrc)) != null) {
569 rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6];
570
571 if (!rDelim) continue; // skip single * in __abc*abc__
572
573 rLength = rDelim.length;
574
575 if (match[3] || match[4]) { // found another Left Delim
576 delimTotal += rLength;
577 continue;
578 } else if (match[5] || match[6]) { // either Left or Right Delim
579 if (lLength % 3 && !((lLength + rLength) % 3)) {
580 midDelimTotal += rLength;
581 continue; // CommonMark Emphasis Rules 9-10
582 }
583 }
584
585 delimTotal -= rLength;
586
587 if (delimTotal > 0) continue; // Haven't found enough closing delimiters
588
589 // Remove extra characters. *a*** -> *a*
590 rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
591
592 // Create `em` if smallest delimiter has odd char count. *a***
593 if (Math.min(lLength, rLength) % 2) {
594 return {
595 type: 'em',
596 raw: src.slice(0, lLength + match.index + rLength + 1),
597 text: src.slice(1, lLength + match.index + rLength)
598 };
599 }
600
601 // Create 'strong' if smallest delimiter has even char count. **a***
602 return {
603 type: 'strong',
604 raw: src.slice(0, lLength + match.index + rLength + 1),
605 text: src.slice(2, lLength + match.index + rLength - 1)
606 };
607 }
608 }
609 }
610
611 codespan(src) {
612 const cap = this.rules.inline.code.exec(src);
613 if (cap) {
614 let text = cap[2].replace(/\n/g, ' ');
615 const hasNonSpaceChars = /[^ ]/.test(text);
616 const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
617 if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
618 text = text.substring(1, text.length - 1);
619 }
620 text = escape(text, true);
621 return {
622 type: 'codespan',
623 raw: cap[0],
624 text
625 };
626 }
627 }
628
629 br(src) {
630 const cap = this.rules.inline.br.exec(src);
631 if (cap) {
632 return {
633 type: 'br',
634 raw: cap[0]
635 };
636 }
637 }
638
639 del(src) {
640 const cap = this.rules.inline.del.exec(src);
641 if (cap) {
642 return {
643 type: 'del',
644 raw: cap[0],
645 text: cap[2]
646 };
647 }
648 }
649
650 autolink(src, mangle) {
651 const cap = this.rules.inline.autolink.exec(src);
652 if (cap) {
653 let text, href;
654 if (cap[2] === '@') {
655 text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
656 href = 'mailto:' + text;
657 } else {
658 text = escape(cap[1]);
659 href = text;
660 }
661
662 return {
663 type: 'link',
664 raw: cap[0],
665 text,
666 href,
667 tokens: [
668 {
669 type: 'text',
670 raw: text,
671 text
672 }
673 ]
674 };
675 }
676 }
677
678 url(src, mangle) {
679 let cap;
680 if (cap = this.rules.inline.url.exec(src)) {
681 let text, href;
682 if (cap[2] === '@') {
683 text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
684 href = 'mailto:' + text;
685 } else {
686 // do extended autolink path validation
687 let prevCapZero;
688 do {
689 prevCapZero = cap[0];
690 cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
691 } while (prevCapZero !== cap[0]);
692 text = escape(cap[0]);
693 if (cap[1] === 'www.') {
694 href = 'http://' + text;
695 } else {
696 href = text;
697 }
698 }
699 return {
700 type: 'link',
701 raw: cap[0],
702 text,
703 href,
704 tokens: [
705 {
706 type: 'text',
707 raw: text,
708 text
709 }
710 ]
711 };
712 }
713 }
714
715 inlineText(src, inRawBlock, smartypants) {
716 const cap = this.rules.inline.text.exec(src);
717 if (cap) {
718 let text;
719 if (inRawBlock) {
720 text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
721 } else {
722 text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
723 }
724 return {
725 type: 'text',
726 raw: cap[0],
727 text
728 };
729 }
730 }
731};