UNPKG

17.7 kBJavaScriptView Raw
1const { defaults } = require('./defaults.js');
2const {
3 rtrim,
4 splitCells,
5 escape,
6 findClosingBracket
7} = require('./helpers.js');
8
9function outputLink(cap, link, raw) {
10 const href = link.href;
11 const title = link.title ? escape(link.title) : null;
12 const text = cap[1].replace(/\\([\[\]])/g, '$1');
13
14 if (cap[0].charAt(0) !== '!') {
15 return {
16 type: 'link',
17 raw,
18 href,
19 title,
20 text
21 };
22 } else {
23 return {
24 type: 'image',
25 raw,
26 href,
27 title,
28 text: escape(text)
29 };
30 }
31}
32
33function indentCodeCompensation(raw, text) {
34 const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
35
36 if (matchIndentToCode === null) {
37 return text;
38 }
39
40 const indentToCode = matchIndentToCode[1];
41
42 return text
43 .split('\n')
44 .map(node => {
45 const matchIndentInNode = node.match(/^\s+/);
46 if (matchIndentInNode === null) {
47 return node;
48 }
49
50 const [indentInNode] = matchIndentInNode;
51
52 if (indentInNode.length >= indentToCode.length) {
53 return node.slice(indentToCode.length);
54 }
55
56 return node;
57 })
58 .join('\n');
59}
60
61/**
62 * Tokenizer
63 */
64module.exports = class Tokenizer {
65 constructor(options) {
66 this.options = options || defaults;
67 }
68
69 space(src) {
70 const cap = this.rules.block.newline.exec(src);
71 if (cap) {
72 if (cap[0].length > 1) {
73 return {
74 type: 'space',
75 raw: cap[0]
76 };
77 }
78 return { raw: '\n' };
79 }
80 }
81
82 code(src, tokens) {
83 const cap = this.rules.block.code.exec(src);
84 if (cap) {
85 const lastToken = tokens[tokens.length - 1];
86 // An indented code block cannot interrupt a paragraph.
87 if (lastToken && lastToken.type === 'paragraph') {
88 return {
89 raw: cap[0],
90 text: cap[0].trimRight()
91 };
92 }
93
94 const text = cap[0].replace(/^ {4}/gm, '');
95 return {
96 type: 'code',
97 raw: cap[0],
98 codeBlockStyle: 'indented',
99 text: !this.options.pedantic
100 ? rtrim(text, '\n')
101 : text
102 };
103 }
104 }
105
106 fences(src) {
107 const cap = this.rules.block.fences.exec(src);
108 if (cap) {
109 const raw = cap[0];
110 const text = indentCodeCompensation(raw, cap[3] || '');
111
112 return {
113 type: 'code',
114 raw,
115 lang: cap[2] ? cap[2].trim() : cap[2],
116 text
117 };
118 }
119 }
120
121 heading(src) {
122 const cap = this.rules.block.heading.exec(src);
123 if (cap) {
124 let text = cap[2].trim();
125
126 // remove trailing #s
127 if (/#$/.test(text)) {
128 const trimmed = rtrim(text, '#');
129 if (this.options.pedantic) {
130 text = trimmed.trim();
131 } else if (!trimmed || / $/.test(trimmed)) {
132 // CommonMark requires space before trailing #s
133 text = trimmed.trim();
134 }
135 }
136
137 return {
138 type: 'heading',
139 raw: cap[0],
140 depth: cap[1].length,
141 text: text
142 };
143 }
144 }
145
146 nptable(src) {
147 const cap = this.rules.block.nptable.exec(src);
148 if (cap) {
149 const item = {
150 type: 'table',
151 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
152 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
153 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
154 raw: cap[0]
155 };
156
157 if (item.header.length === item.align.length) {
158 let l = item.align.length;
159 let i;
160 for (i = 0; i < l; i++) {
161 if (/^ *-+: *$/.test(item.align[i])) {
162 item.align[i] = 'right';
163 } else if (/^ *:-+: *$/.test(item.align[i])) {
164 item.align[i] = 'center';
165 } else if (/^ *:-+ *$/.test(item.align[i])) {
166 item.align[i] = 'left';
167 } else {
168 item.align[i] = null;
169 }
170 }
171
172 l = item.cells.length;
173 for (i = 0; i < l; i++) {
174 item.cells[i] = splitCells(item.cells[i], item.header.length);
175 }
176
177 return item;
178 }
179 }
180 }
181
182 hr(src) {
183 const cap = this.rules.block.hr.exec(src);
184 if (cap) {
185 return {
186 type: 'hr',
187 raw: cap[0]
188 };
189 }
190 }
191
192 blockquote(src) {
193 const cap = this.rules.block.blockquote.exec(src);
194 if (cap) {
195 const text = cap[0].replace(/^ *> ?/gm, '');
196
197 return {
198 type: 'blockquote',
199 raw: cap[0],
200 text
201 };
202 }
203 }
204
205 list(src) {
206 const cap = this.rules.block.list.exec(src);
207 if (cap) {
208 let raw = cap[0];
209 const bull = cap[2];
210 const isordered = bull.length > 1;
211
212 const list = {
213 type: 'list',
214 raw,
215 ordered: isordered,
216 start: isordered ? +bull.slice(0, -1) : '',
217 loose: false,
218 items: []
219 };
220
221 // Get each top-level item.
222 const itemMatch = cap[0].match(this.rules.block.item);
223
224 let next = false,
225 item,
226 space,
227 bcurr,
228 bnext,
229 addBack,
230 loose,
231 istask,
232 ischecked;
233
234 let l = itemMatch.length;
235 bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
236 for (let i = 0; i < l; i++) {
237 item = itemMatch[i];
238 raw = item;
239
240 // Determine whether the next list item belongs here.
241 // Backpedal if it does not belong in this list.
242 if (i !== l - 1) {
243 bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
244
245 if (bnext[1].length > bcurr[0].length || bnext[1].length > 3) {
246 // nested list
247 itemMatch.splice(i, 2, itemMatch[i] + '\n' + itemMatch[i + 1]);
248 i--;
249 l--;
250 continue;
251 } else {
252 if (
253 // different bullet style
254 !this.options.pedantic || this.options.smartLists
255 ? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
256 : isordered === (bnext[2].length === 1)
257 ) {
258 addBack = itemMatch.slice(i + 1).join('\n');
259 list.raw = list.raw.substring(0, list.raw.length - addBack.length);
260 i = l - 1;
261 }
262 }
263 bcurr = bnext;
264 }
265
266 // Remove the list item's bullet
267 // so it is seen as the next token.
268 space = item.length;
269 item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');
270
271 // Outdent whatever the
272 // list item contains. Hacky.
273 if (~item.indexOf('\n ')) {
274 space -= item.length;
275 item = !this.options.pedantic
276 ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
277 : item.replace(/^ {1,4}/gm, '');
278 }
279
280 // Determine whether item is loose or not.
281 // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
282 // for discount behavior.
283 loose = next || /\n\n(?!\s*$)/.test(item);
284 if (i !== l - 1) {
285 next = item.charAt(item.length - 1) === '\n';
286 if (!loose) loose = next;
287 }
288
289 if (loose) {
290 list.loose = true;
291 }
292
293 // Check for task list items
294 if (this.options.gfm) {
295 istask = /^\[[ xX]\] /.test(item);
296 ischecked = undefined;
297 if (istask) {
298 ischecked = item[1] !== ' ';
299 item = item.replace(/^\[[ xX]\] +/, '');
300 }
301 }
302
303 list.items.push({
304 type: 'list_item',
305 raw,
306 task: istask,
307 checked: ischecked,
308 loose: loose,
309 text: item
310 });
311 }
312
313 return list;
314 }
315 }
316
317 html(src) {
318 const cap = this.rules.block.html.exec(src);
319 if (cap) {
320 return {
321 type: this.options.sanitize
322 ? 'paragraph'
323 : 'html',
324 raw: cap[0],
325 pre: !this.options.sanitizer
326 && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
327 text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
328 };
329 }
330 }
331
332 def(src) {
333 const cap = this.rules.block.def.exec(src);
334 if (cap) {
335 if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
336 const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
337 return {
338 tag,
339 raw: cap[0],
340 href: cap[2],
341 title: cap[3]
342 };
343 }
344 }
345
346 table(src) {
347 const cap = this.rules.block.table.exec(src);
348 if (cap) {
349 const item = {
350 type: 'table',
351 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
352 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
353 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
354 };
355
356 if (item.header.length === item.align.length) {
357 item.raw = cap[0];
358
359 let l = item.align.length;
360 let i;
361 for (i = 0; i < l; i++) {
362 if (/^ *-+: *$/.test(item.align[i])) {
363 item.align[i] = 'right';
364 } else if (/^ *:-+: *$/.test(item.align[i])) {
365 item.align[i] = 'center';
366 } else if (/^ *:-+ *$/.test(item.align[i])) {
367 item.align[i] = 'left';
368 } else {
369 item.align[i] = null;
370 }
371 }
372
373 l = item.cells.length;
374 for (i = 0; i < l; i++) {
375 item.cells[i] = splitCells(
376 item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
377 item.header.length);
378 }
379
380 return item;
381 }
382 }
383 }
384
385 lheading(src) {
386 const cap = this.rules.block.lheading.exec(src);
387 if (cap) {
388 return {
389 type: 'heading',
390 raw: cap[0],
391 depth: cap[2].charAt(0) === '=' ? 1 : 2,
392 text: cap[1]
393 };
394 }
395 }
396
397 paragraph(src) {
398 const cap = this.rules.block.paragraph.exec(src);
399 if (cap) {
400 return {
401 type: 'paragraph',
402 raw: cap[0],
403 text: cap[1].charAt(cap[1].length - 1) === '\n'
404 ? cap[1].slice(0, -1)
405 : cap[1]
406 };
407 }
408 }
409
410 text(src, tokens) {
411 const cap = this.rules.block.text.exec(src);
412 if (cap) {
413 const lastToken = tokens[tokens.length - 1];
414 if (lastToken && lastToken.type === 'text') {
415 return {
416 raw: cap[0],
417 text: cap[0]
418 };
419 }
420
421 return {
422 type: 'text',
423 raw: cap[0],
424 text: cap[0]
425 };
426 }
427 }
428
429 escape(src) {
430 const cap = this.rules.inline.escape.exec(src);
431 if (cap) {
432 return {
433 type: 'escape',
434 raw: cap[0],
435 text: escape(cap[1])
436 };
437 }
438 }
439
440 tag(src, inLink, inRawBlock) {
441 const cap = this.rules.inline.tag.exec(src);
442 if (cap) {
443 if (!inLink && /^<a /i.test(cap[0])) {
444 inLink = true;
445 } else if (inLink && /^<\/a>/i.test(cap[0])) {
446 inLink = false;
447 }
448 if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
449 inRawBlock = true;
450 } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
451 inRawBlock = false;
452 }
453
454 return {
455 type: this.options.sanitize
456 ? 'text'
457 : 'html',
458 raw: cap[0],
459 inLink,
460 inRawBlock,
461 text: this.options.sanitize
462 ? (this.options.sanitizer
463 ? this.options.sanitizer(cap[0])
464 : escape(cap[0]))
465 : cap[0]
466 };
467 }
468 }
469
470 link(src) {
471 const cap = this.rules.inline.link.exec(src);
472 if (cap) {
473 const trimmedUrl = cap[2].trim();
474 if (!this.options.pedantic && /^</.test(trimmedUrl)) {
475 // commonmark requires matching angle brackets
476 if (!(/>$/.test(trimmedUrl))) {
477 return;
478 }
479
480 // ending angle bracket cannot be escaped
481 const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
482 if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
483 return;
484 }
485 } else {
486 // find closing parenthesis
487 const lastParenIndex = findClosingBracket(cap[2], '()');
488 if (lastParenIndex > -1) {
489 const start = cap[0].indexOf('!') === 0 ? 5 : 4;
490 const linkLen = start + cap[1].length + lastParenIndex;
491 cap[2] = cap[2].substring(0, lastParenIndex);
492 cap[0] = cap[0].substring(0, linkLen).trim();
493 cap[3] = '';
494 }
495 }
496 let href = cap[2];
497 let title = '';
498 if (this.options.pedantic) {
499 // split pedantic href and title
500 const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
501
502 if (link) {
503 href = link[1];
504 title = link[3];
505 }
506 } else {
507 title = cap[3] ? cap[3].slice(1, -1) : '';
508 }
509
510 href = href.trim();
511 if (/^</.test(href)) {
512 if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
513 // pedantic allows starting angle bracket without ending angle bracket
514 href = href.slice(1);
515 } else {
516 href = href.slice(1, -1);
517 }
518 }
519 return outputLink(cap, {
520 href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
521 title: title ? title.replace(this.rules.inline._escapes, '$1') : title
522 }, cap[0]);
523 }
524 }
525
526 reflink(src, links) {
527 let cap;
528 if ((cap = this.rules.inline.reflink.exec(src))
529 || (cap = this.rules.inline.nolink.exec(src))) {
530 let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
531 link = links[link.toLowerCase()];
532 if (!link || !link.href) {
533 const text = cap[0].charAt(0);
534 return {
535 type: 'text',
536 raw: text,
537 text
538 };
539 }
540 return outputLink(cap, link, cap[0]);
541 }
542 }
543
544 strong(src, maskedSrc, prevChar = '') {
545 let match = this.rules.inline.strong.start.exec(src);
546
547 if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
548 maskedSrc = maskedSrc.slice(-1 * src.length);
549 const endReg = match[0] === '**' ? this.rules.inline.strong.endAst : this.rules.inline.strong.endUnd;
550
551 endReg.lastIndex = 0;
552
553 let cap;
554 while ((match = endReg.exec(maskedSrc)) != null) {
555 cap = this.rules.inline.strong.middle.exec(maskedSrc.slice(0, match.index + 3));
556 if (cap) {
557 return {
558 type: 'strong',
559 raw: src.slice(0, cap[0].length),
560 text: src.slice(2, cap[0].length - 2)
561 };
562 }
563 }
564 }
565 }
566
567 em(src, maskedSrc, prevChar = '') {
568 let match = this.rules.inline.em.start.exec(src);
569
570 if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
571 maskedSrc = maskedSrc.slice(-1 * src.length);
572 const endReg = match[0] === '*' ? this.rules.inline.em.endAst : this.rules.inline.em.endUnd;
573
574 endReg.lastIndex = 0;
575
576 let cap;
577 while ((match = endReg.exec(maskedSrc)) != null) {
578 cap = this.rules.inline.em.middle.exec(maskedSrc.slice(0, match.index + 2));
579 if (cap) {
580 return {
581 type: 'em',
582 raw: src.slice(0, cap[0].length),
583 text: src.slice(1, cap[0].length - 1)
584 };
585 }
586 }
587 }
588 }
589
590 codespan(src) {
591 const cap = this.rules.inline.code.exec(src);
592 if (cap) {
593 let text = cap[2].replace(/\n/g, ' ');
594 const hasNonSpaceChars = /[^ ]/.test(text);
595 const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
596 if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
597 text = text.substring(1, text.length - 1);
598 }
599 text = escape(text, true);
600 return {
601 type: 'codespan',
602 raw: cap[0],
603 text
604 };
605 }
606 }
607
608 br(src) {
609 const cap = this.rules.inline.br.exec(src);
610 if (cap) {
611 return {
612 type: 'br',
613 raw: cap[0]
614 };
615 }
616 }
617
618 del(src) {
619 const cap = this.rules.inline.del.exec(src);
620 if (cap) {
621 return {
622 type: 'del',
623 raw: cap[0],
624 text: cap[2]
625 };
626 }
627 }
628
629 autolink(src, mangle) {
630 const cap = this.rules.inline.autolink.exec(src);
631 if (cap) {
632 let text, href;
633 if (cap[2] === '@') {
634 text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
635 href = 'mailto:' + text;
636 } else {
637 text = escape(cap[1]);
638 href = text;
639 }
640
641 return {
642 type: 'link',
643 raw: cap[0],
644 text,
645 href,
646 tokens: [
647 {
648 type: 'text',
649 raw: text,
650 text
651 }
652 ]
653 };
654 }
655 }
656
657 url(src, mangle) {
658 let cap;
659 if (cap = this.rules.inline.url.exec(src)) {
660 let text, href;
661 if (cap[2] === '@') {
662 text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
663 href = 'mailto:' + text;
664 } else {
665 // do extended autolink path validation
666 let prevCapZero;
667 do {
668 prevCapZero = cap[0];
669 cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
670 } while (prevCapZero !== cap[0]);
671 text = escape(cap[0]);
672 if (cap[1] === 'www.') {
673 href = 'http://' + text;
674 } else {
675 href = text;
676 }
677 }
678 return {
679 type: 'link',
680 raw: cap[0],
681 text,
682 href,
683 tokens: [
684 {
685 type: 'text',
686 raw: text,
687 text
688 }
689 ]
690 };
691 }
692 }
693
694 inlineText(src, inRawBlock, smartypants) {
695 const cap = this.rules.inline.text.exec(src);
696 if (cap) {
697 let text;
698 if (inRawBlock) {
699 text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
700 } else {
701 text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
702 }
703 return {
704 type: 'text',
705 raw: cap[0],
706 text
707 };
708 }
709 }
710};