UNPKG

17.9 kBJavaScriptView Raw
1const { defaults } = require('./defaults.js');
2const {
3 rtrim,
4 splitCells,
5 escape,
6 findClosingBracket
7} = require('./helpers.js');
8
9function outputLink(cap, link, raw) {
10 const href = link.href;
11 const title = link.title ? escape(link.title) : null;
12 const text = cap[1].replace(/\\([\[\]])/g, '$1');
13
14 if (cap[0].charAt(0) !== '!') {
15 return {
16 type: 'link',
17 raw,
18 href,
19 title,
20 text
21 };
22 } else {
23 return {
24 type: 'image',
25 raw,
26 href,
27 title,
28 text: escape(text)
29 };
30 }
31}
32
33function indentCodeCompensation(raw, text) {
34 const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
35
36 if (matchIndentToCode === null) {
37 return text;
38 }
39
40 const indentToCode = matchIndentToCode[1];
41
42 return text
43 .split('\n')
44 .map(node => {
45 const matchIndentInNode = node.match(/^\s+/);
46 if (matchIndentInNode === null) {
47 return node;
48 }
49
50 const [indentInNode] = matchIndentInNode;
51
52 if (indentInNode.length >= indentToCode.length) {
53 return node.slice(indentToCode.length);
54 }
55
56 return node;
57 })
58 .join('\n');
59}
60
61/**
62 * Tokenizer
63 */
64module.exports = class Tokenizer {
65 constructor(options) {
66 this.options = options || defaults;
67 }
68
69 space(src) {
70 const cap = this.rules.block.newline.exec(src);
71 if (cap) {
72 if (cap[0].length > 1) {
73 return {
74 type: 'space',
75 raw: cap[0]
76 };
77 }
78 return { raw: '\n' };
79 }
80 }
81
82 code(src, tokens) {
83 const cap = this.rules.block.code.exec(src);
84 if (cap) {
85 const lastToken = tokens[tokens.length - 1];
86 // An indented code block cannot interrupt a paragraph.
87 if (lastToken && lastToken.type === 'paragraph') {
88 return {
89 raw: cap[0],
90 text: cap[0].trimRight()
91 };
92 }
93
94 const text = cap[0].replace(/^ {1,4}/gm, '');
95 return {
96 type: 'code',
97 raw: cap[0],
98 codeBlockStyle: 'indented',
99 text: !this.options.pedantic
100 ? rtrim(text, '\n')
101 : text
102 };
103 }
104 }
105
106 fences(src) {
107 const cap = this.rules.block.fences.exec(src);
108 if (cap) {
109 const raw = cap[0];
110 const text = indentCodeCompensation(raw, cap[3] || '');
111
112 return {
113 type: 'code',
114 raw,
115 lang: cap[2] ? cap[2].trim() : cap[2],
116 text
117 };
118 }
119 }
120
121 heading(src) {
122 const cap = this.rules.block.heading.exec(src);
123 if (cap) {
124 let text = cap[2].trim();
125
126 // remove trailing #s
127 if (/#$/.test(text)) {
128 const trimmed = rtrim(text, '#');
129 if (this.options.pedantic) {
130 text = trimmed.trim();
131 } else if (!trimmed || / $/.test(trimmed)) {
132 // CommonMark requires space before trailing #s
133 text = trimmed.trim();
134 }
135 }
136
137 return {
138 type: 'heading',
139 raw: cap[0],
140 depth: cap[1].length,
141 text: text
142 };
143 }
144 }
145
146 nptable(src) {
147 const cap = this.rules.block.nptable.exec(src);
148 if (cap) {
149 const item = {
150 type: 'table',
151 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
152 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
153 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
154 raw: cap[0]
155 };
156
157 if (item.header.length === item.align.length) {
158 let l = item.align.length;
159 let i;
160 for (i = 0; i < l; i++) {
161 if (/^ *-+: *$/.test(item.align[i])) {
162 item.align[i] = 'right';
163 } else if (/^ *:-+: *$/.test(item.align[i])) {
164 item.align[i] = 'center';
165 } else if (/^ *:-+ *$/.test(item.align[i])) {
166 item.align[i] = 'left';
167 } else {
168 item.align[i] = null;
169 }
170 }
171
172 l = item.cells.length;
173 for (i = 0; i < l; i++) {
174 item.cells[i] = splitCells(item.cells[i], item.header.length);
175 }
176
177 return item;
178 }
179 }
180 }
181
182 hr(src) {
183 const cap = this.rules.block.hr.exec(src);
184 if (cap) {
185 return {
186 type: 'hr',
187 raw: cap[0]
188 };
189 }
190 }
191
192 blockquote(src) {
193 const cap = this.rules.block.blockquote.exec(src);
194 if (cap) {
195 const text = cap[0].replace(/^ *> ?/gm, '');
196
197 return {
198 type: 'blockquote',
199 raw: cap[0],
200 text
201 };
202 }
203 }
204
205 list(src) {
206 const cap = this.rules.block.list.exec(src);
207 if (cap) {
208 let raw = cap[0];
209 const bull = cap[2];
210 const isordered = bull.length > 1;
211
212 const list = {
213 type: 'list',
214 raw,
215 ordered: isordered,
216 start: isordered ? +bull.slice(0, -1) : '',
217 loose: false,
218 items: []
219 };
220
221 // Get each top-level item.
222 const itemMatch = cap[0].match(this.rules.block.item);
223
224 let next = false,
225 item,
226 space,
227 bcurr,
228 bnext,
229 addBack,
230 loose,
231 istask,
232 ischecked;
233
234 let l = itemMatch.length;
235 bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
236 for (let i = 0; i < l; i++) {
237 item = itemMatch[i];
238 raw = item;
239
240 // Determine whether the next list item belongs here.
241 // Backpedal if it does not belong in this list.
242 if (i !== l - 1) {
243 bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
244 if (
245 !this.options.pedantic
246 ? bnext[1].length > bcurr[0].length || bnext[1].length > 3
247 : bnext[1].length > bcurr[1].length
248 ) {
249 // nested list
250 itemMatch.splice(i, 2, itemMatch[i] + '\n' + itemMatch[i + 1]);
251 i--;
252 l--;
253 continue;
254 } else {
255 if (
256 // different bullet style
257 !this.options.pedantic || this.options.smartLists
258 ? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
259 : isordered === (bnext[2].length === 1)
260 ) {
261 addBack = itemMatch.slice(i + 1).join('\n');
262 list.raw = list.raw.substring(0, list.raw.length - addBack.length);
263 i = l - 1;
264 }
265 }
266 bcurr = bnext;
267 }
268
269 // Remove the list item's bullet
270 // so it is seen as the next token.
271 space = item.length;
272 item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');
273
274 // Outdent whatever the
275 // list item contains. Hacky.
276 if (~item.indexOf('\n ')) {
277 space -= item.length;
278 item = !this.options.pedantic
279 ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
280 : item.replace(/^ {1,4}/gm, '');
281 }
282
283 // Determine whether item is loose or not.
284 // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
285 // for discount behavior.
286 loose = next || /\n\n(?!\s*$)/.test(item);
287 if (i !== l - 1) {
288 next = item.charAt(item.length - 1) === '\n';
289 if (!loose) loose = next;
290 }
291
292 if (loose) {
293 list.loose = true;
294 }
295
296 // Check for task list items
297 if (this.options.gfm) {
298 istask = /^\[[ xX]\] /.test(item);
299 ischecked = undefined;
300 if (istask) {
301 ischecked = item[1] !== ' ';
302 item = item.replace(/^\[[ xX]\] +/, '');
303 }
304 }
305
306 list.items.push({
307 type: 'list_item',
308 raw,
309 task: istask,
310 checked: ischecked,
311 loose: loose,
312 text: item
313 });
314 }
315
316 return list;
317 }
318 }
319
320 html(src) {
321 const cap = this.rules.block.html.exec(src);
322 if (cap) {
323 return {
324 type: this.options.sanitize
325 ? 'paragraph'
326 : 'html',
327 raw: cap[0],
328 pre: !this.options.sanitizer
329 && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
330 text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
331 };
332 }
333 }
334
335 def(src) {
336 const cap = this.rules.block.def.exec(src);
337 if (cap) {
338 if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
339 const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
340 return {
341 tag,
342 raw: cap[0],
343 href: cap[2],
344 title: cap[3]
345 };
346 }
347 }
348
349 table(src) {
350 const cap = this.rules.block.table.exec(src);
351 if (cap) {
352 const item = {
353 type: 'table',
354 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
355 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
356 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
357 };
358
359 if (item.header.length === item.align.length) {
360 item.raw = cap[0];
361
362 let l = item.align.length;
363 let i;
364 for (i = 0; i < l; i++) {
365 if (/^ *-+: *$/.test(item.align[i])) {
366 item.align[i] = 'right';
367 } else if (/^ *:-+: *$/.test(item.align[i])) {
368 item.align[i] = 'center';
369 } else if (/^ *:-+ *$/.test(item.align[i])) {
370 item.align[i] = 'left';
371 } else {
372 item.align[i] = null;
373 }
374 }
375
376 l = item.cells.length;
377 for (i = 0; i < l; i++) {
378 item.cells[i] = splitCells(
379 item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
380 item.header.length);
381 }
382
383 return item;
384 }
385 }
386 }
387
388 lheading(src) {
389 const cap = this.rules.block.lheading.exec(src);
390 if (cap) {
391 return {
392 type: 'heading',
393 raw: cap[0],
394 depth: cap[2].charAt(0) === '=' ? 1 : 2,
395 text: cap[1]
396 };
397 }
398 }
399
400 paragraph(src) {
401 const cap = this.rules.block.paragraph.exec(src);
402 if (cap) {
403 return {
404 type: 'paragraph',
405 raw: cap[0],
406 text: cap[1].charAt(cap[1].length - 1) === '\n'
407 ? cap[1].slice(0, -1)
408 : cap[1]
409 };
410 }
411 }
412
413 text(src, tokens) {
414 const cap = this.rules.block.text.exec(src);
415 if (cap) {
416 const lastToken = tokens[tokens.length - 1];
417 if (lastToken && lastToken.type === 'text') {
418 return {
419 raw: cap[0],
420 text: cap[0]
421 };
422 }
423
424 return {
425 type: 'text',
426 raw: cap[0],
427 text: cap[0]
428 };
429 }
430 }
431
432 escape(src) {
433 const cap = this.rules.inline.escape.exec(src);
434 if (cap) {
435 return {
436 type: 'escape',
437 raw: cap[0],
438 text: escape(cap[1])
439 };
440 }
441 }
442
443 tag(src, inLink, inRawBlock) {
444 const cap = this.rules.inline.tag.exec(src);
445 if (cap) {
446 if (!inLink && /^<a /i.test(cap[0])) {
447 inLink = true;
448 } else if (inLink && /^<\/a>/i.test(cap[0])) {
449 inLink = false;
450 }
451 if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
452 inRawBlock = true;
453 } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
454 inRawBlock = false;
455 }
456
457 return {
458 type: this.options.sanitize
459 ? 'text'
460 : 'html',
461 raw: cap[0],
462 inLink,
463 inRawBlock,
464 text: this.options.sanitize
465 ? (this.options.sanitizer
466 ? this.options.sanitizer(cap[0])
467 : escape(cap[0]))
468 : cap[0]
469 };
470 }
471 }
472
473 link(src) {
474 const cap = this.rules.inline.link.exec(src);
475 if (cap) {
476 const trimmedUrl = cap[2].trim();
477 if (!this.options.pedantic && /^</.test(trimmedUrl)) {
478 // commonmark requires matching angle brackets
479 if (!(/>$/.test(trimmedUrl))) {
480 return;
481 }
482
483 // ending angle bracket cannot be escaped
484 const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
485 if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
486 return;
487 }
488 } else {
489 // find closing parenthesis
490 const lastParenIndex = findClosingBracket(cap[2], '()');
491 if (lastParenIndex > -1) {
492 const start = cap[0].indexOf('!') === 0 ? 5 : 4;
493 const linkLen = start + cap[1].length + lastParenIndex;
494 cap[2] = cap[2].substring(0, lastParenIndex);
495 cap[0] = cap[0].substring(0, linkLen).trim();
496 cap[3] = '';
497 }
498 }
499 let href = cap[2];
500 let title = '';
501 if (this.options.pedantic) {
502 // split pedantic href and title
503 const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
504
505 if (link) {
506 href = link[1];
507 title = link[3];
508 }
509 } else {
510 title = cap[3] ? cap[3].slice(1, -1) : '';
511 }
512
513 href = href.trim();
514 if (/^</.test(href)) {
515 if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
516 // pedantic allows starting angle bracket without ending angle bracket
517 href = href.slice(1);
518 } else {
519 href = href.slice(1, -1);
520 }
521 }
522 return outputLink(cap, {
523 href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
524 title: title ? title.replace(this.rules.inline._escapes, '$1') : title
525 }, cap[0]);
526 }
527 }
528
529 reflink(src, links) {
530 let cap;
531 if ((cap = this.rules.inline.reflink.exec(src))
532 || (cap = this.rules.inline.nolink.exec(src))) {
533 let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
534 link = links[link.toLowerCase()];
535 if (!link || !link.href) {
536 const text = cap[0].charAt(0);
537 return {
538 type: 'text',
539 raw: text,
540 text
541 };
542 }
543 return outputLink(cap, link, cap[0]);
544 }
545 }
546
547 strong(src, maskedSrc, prevChar = '') {
548 let match = this.rules.inline.strong.start.exec(src);
549
550 if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
551 maskedSrc = maskedSrc.slice(-1 * src.length);
552 const endReg = match[0] === '**' ? this.rules.inline.strong.endAst : this.rules.inline.strong.endUnd;
553
554 endReg.lastIndex = 0;
555
556 let cap;
557 while ((match = endReg.exec(maskedSrc)) != null) {
558 cap = this.rules.inline.strong.middle.exec(maskedSrc.slice(0, match.index + 3));
559 if (cap) {
560 return {
561 type: 'strong',
562 raw: src.slice(0, cap[0].length),
563 text: src.slice(2, cap[0].length - 2)
564 };
565 }
566 }
567 }
568 }
569
570 em(src, maskedSrc, prevChar = '') {
571 let match = this.rules.inline.em.start.exec(src);
572
573 if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
574 maskedSrc = maskedSrc.slice(-1 * src.length);
575 const endReg = match[0] === '*' ? this.rules.inline.em.endAst : this.rules.inline.em.endUnd;
576
577 endReg.lastIndex = 0;
578
579 let cap;
580 while ((match = endReg.exec(maskedSrc)) != null) {
581 cap = this.rules.inline.em.middle.exec(maskedSrc.slice(0, match.index + 2));
582 if (cap) {
583 return {
584 type: 'em',
585 raw: src.slice(0, cap[0].length),
586 text: src.slice(1, cap[0].length - 1)
587 };
588 }
589 }
590 }
591 }
592
593 codespan(src) {
594 const cap = this.rules.inline.code.exec(src);
595 if (cap) {
596 let text = cap[2].replace(/\n/g, ' ');
597 const hasNonSpaceChars = /[^ ]/.test(text);
598 const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
599 if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
600 text = text.substring(1, text.length - 1);
601 }
602 text = escape(text, true);
603 return {
604 type: 'codespan',
605 raw: cap[0],
606 text
607 };
608 }
609 }
610
611 br(src) {
612 const cap = this.rules.inline.br.exec(src);
613 if (cap) {
614 return {
615 type: 'br',
616 raw: cap[0]
617 };
618 }
619 }
620
621 del(src) {
622 const cap = this.rules.inline.del.exec(src);
623 if (cap) {
624 return {
625 type: 'del',
626 raw: cap[0],
627 text: cap[2]
628 };
629 }
630 }
631
632 autolink(src, mangle) {
633 const cap = this.rules.inline.autolink.exec(src);
634 if (cap) {
635 let text, href;
636 if (cap[2] === '@') {
637 text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
638 href = 'mailto:' + text;
639 } else {
640 text = escape(cap[1]);
641 href = text;
642 }
643
644 return {
645 type: 'link',
646 raw: cap[0],
647 text,
648 href,
649 tokens: [
650 {
651 type: 'text',
652 raw: text,
653 text
654 }
655 ]
656 };
657 }
658 }
659
660 url(src, mangle) {
661 let cap;
662 if (cap = this.rules.inline.url.exec(src)) {
663 let text, href;
664 if (cap[2] === '@') {
665 text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
666 href = 'mailto:' + text;
667 } else {
668 // do extended autolink path validation
669 let prevCapZero;
670 do {
671 prevCapZero = cap[0];
672 cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
673 } while (prevCapZero !== cap[0]);
674 text = escape(cap[0]);
675 if (cap[1] === 'www.') {
676 href = 'http://' + text;
677 } else {
678 href = text;
679 }
680 }
681 return {
682 type: 'link',
683 raw: cap[0],
684 text,
685 href,
686 tokens: [
687 {
688 type: 'text',
689 raw: text,
690 text
691 }
692 ]
693 };
694 }
695 }
696
697 inlineText(src, inRawBlock, smartypants) {
698 const cap = this.rules.inline.text.exec(src);
699 if (cap) {
700 let text;
701 if (inRawBlock) {
702 text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
703 } else {
704 text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
705 }
706 return {
707 type: 'text',
708 raw: cap[0],
709 text
710 };
711 }
712 }
713};