UNPKG

19 kBJavaScriptView Raw
1const { defaults } = require('./defaults.js');
2const {
3 rtrim,
4 splitCells,
5 escape,
6 findClosingBracket
7} = require('./helpers.js');
8
9function outputLink(cap, link, raw) {
10 const href = link.href;
11 const title = link.title ? escape(link.title) : null;
12 const text = cap[1].replace(/\\([\[\]])/g, '$1');
13
14 if (cap[0].charAt(0) !== '!') {
15 return {
16 type: 'link',
17 raw,
18 href,
19 title,
20 text
21 };
22 } else {
23 return {
24 type: 'image',
25 raw,
26 href,
27 title,
28 text: escape(text)
29 };
30 }
31}
32
33function indentCodeCompensation(raw, text) {
34 const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
35
36 if (matchIndentToCode === null) {
37 return text;
38 }
39
40 const indentToCode = matchIndentToCode[1];
41
42 return text
43 .split('\n')
44 .map(node => {
45 const matchIndentInNode = node.match(/^\s+/);
46 if (matchIndentInNode === null) {
47 return node;
48 }
49
50 const [indentInNode] = matchIndentInNode;
51
52 if (indentInNode.length >= indentToCode.length) {
53 return node.slice(indentToCode.length);
54 }
55
56 return node;
57 })
58 .join('\n');
59}
60
61/**
62 * Tokenizer
63 */
64module.exports = class Tokenizer {
65 constructor(options) {
66 this.options = options || defaults;
67 }
68
69 space(src) {
70 const cap = this.rules.block.newline.exec(src);
71 if (cap) {
72 if (cap[0].length > 1) {
73 return {
74 type: 'space',
75 raw: cap[0]
76 };
77 }
78 return { raw: '\n' };
79 }
80 }
81
82 code(src) {
83 const cap = this.rules.block.code.exec(src);
84 if (cap) {
85 const text = cap[0].replace(/^ {1,4}/gm, '');
86 return {
87 type: 'code',
88 raw: cap[0],
89 codeBlockStyle: 'indented',
90 text: !this.options.pedantic
91 ? rtrim(text, '\n')
92 : text
93 };
94 }
95 }
96
97 fences(src) {
98 const cap = this.rules.block.fences.exec(src);
99 if (cap) {
100 const raw = cap[0];
101 const text = indentCodeCompensation(raw, cap[3] || '');
102
103 return {
104 type: 'code',
105 raw,
106 lang: cap[2] ? cap[2].trim() : cap[2],
107 text
108 };
109 }
110 }
111
112 heading(src) {
113 const cap = this.rules.block.heading.exec(src);
114 if (cap) {
115 let text = cap[2].trim();
116
117 // remove trailing #s
118 if (/#$/.test(text)) {
119 const trimmed = rtrim(text, '#');
120 if (this.options.pedantic) {
121 text = trimmed.trim();
122 } else if (!trimmed || / $/.test(trimmed)) {
123 // CommonMark requires space before trailing #s
124 text = trimmed.trim();
125 }
126 }
127
128 return {
129 type: 'heading',
130 raw: cap[0],
131 depth: cap[1].length,
132 text: text
133 };
134 }
135 }
136
137 nptable(src) {
138 const cap = this.rules.block.nptable.exec(src);
139 if (cap) {
140 const item = {
141 type: 'table',
142 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
143 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
144 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
145 raw: cap[0]
146 };
147
148 if (item.header.length === item.align.length) {
149 let l = item.align.length;
150 let i;
151 for (i = 0; i < l; i++) {
152 if (/^ *-+: *$/.test(item.align[i])) {
153 item.align[i] = 'right';
154 } else if (/^ *:-+: *$/.test(item.align[i])) {
155 item.align[i] = 'center';
156 } else if (/^ *:-+ *$/.test(item.align[i])) {
157 item.align[i] = 'left';
158 } else {
159 item.align[i] = null;
160 }
161 }
162
163 l = item.cells.length;
164 for (i = 0; i < l; i++) {
165 item.cells[i] = splitCells(item.cells[i], item.header.length);
166 }
167
168 return item;
169 }
170 }
171 }
172
173 hr(src) {
174 const cap = this.rules.block.hr.exec(src);
175 if (cap) {
176 return {
177 type: 'hr',
178 raw: cap[0]
179 };
180 }
181 }
182
183 blockquote(src) {
184 const cap = this.rules.block.blockquote.exec(src);
185 if (cap) {
186 const text = cap[0].replace(/^ *> ?/gm, '');
187
188 return {
189 type: 'blockquote',
190 raw: cap[0],
191 text
192 };
193 }
194 }
195
196 list(src) {
197 const cap = this.rules.block.list.exec(src);
198 if (cap) {
199 let raw = cap[0];
200 const bull = cap[2];
201 const isordered = bull.length > 1;
202
203 const list = {
204 type: 'list',
205 raw,
206 ordered: isordered,
207 start: isordered ? +bull.slice(0, -1) : '',
208 loose: false,
209 items: []
210 };
211
212 // Get each top-level item.
213 const itemMatch = cap[0].match(this.rules.block.item);
214
215 let next = false,
216 item,
217 space,
218 bcurr,
219 bnext,
220 addBack,
221 loose,
222 istask,
223 ischecked,
224 endMatch;
225
226 let l = itemMatch.length;
227 bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
228 for (let i = 0; i < l; i++) {
229 item = itemMatch[i];
230 raw = item;
231
232 if (!this.options.pedantic) {
233 // Determine if current item contains the end of the list
234 endMatch = item.match(new RegExp('\\n\\s*\\n {0,' + (bcurr[0].length - 1) + '}\\S'));
235 if (endMatch) {
236 addBack = item.length - endMatch.index + itemMatch.slice(i + 1).join('\n').length;
237 list.raw = list.raw.substring(0, list.raw.length - addBack);
238
239 item = item.substring(0, endMatch.index);
240 raw = item;
241 l = i + 1;
242 }
243 }
244
245 // Determine whether the next list item belongs here.
246 // Backpedal if it does not belong in this list.
247 if (i !== l - 1) {
248 bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
249 if (
250 !this.options.pedantic
251 ? bnext[1].length >= bcurr[0].length || bnext[1].length > 3
252 : bnext[1].length > bcurr[1].length
253 ) {
254 // nested list or continuation
255 itemMatch.splice(i, 2, itemMatch[i] + (!this.options.pedantic && bnext[1].length < bcurr[0].length && !itemMatch[i].match(/\n$/) ? '' : '\n') + itemMatch[i + 1]);
256 i--;
257 l--;
258 continue;
259 } else if (
260 // different bullet style
261 !this.options.pedantic || this.options.smartLists
262 ? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
263 : isordered === (bnext[2].length === 1)
264 ) {
265 addBack = itemMatch.slice(i + 1).join('\n').length;
266 list.raw = list.raw.substring(0, list.raw.length - addBack);
267 i = l - 1;
268 }
269 bcurr = bnext;
270 }
271
272 // Remove the list item's bullet
273 // so it is seen as the next token.
274 space = item.length;
275 item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');
276
277 // Outdent whatever the
278 // list item contains. Hacky.
279 if (~item.indexOf('\n ')) {
280 space -= item.length;
281 item = !this.options.pedantic
282 ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
283 : item.replace(/^ {1,4}/gm, '');
284 }
285
286 // trim item newlines at end
287 item = rtrim(item, '\n');
288 if (i !== l - 1) {
289 raw = raw + '\n';
290 }
291
292 // Determine whether item is loose or not.
293 // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
294 // for discount behavior.
295 loose = next || /\n\n(?!\s*$)/.test(raw);
296 if (i !== l - 1) {
297 next = raw.slice(-2) === '\n\n';
298 if (!loose) loose = next;
299 }
300
301 if (loose) {
302 list.loose = true;
303 }
304
305 // Check for task list items
306 if (this.options.gfm) {
307 istask = /^\[[ xX]\] /.test(item);
308 ischecked = undefined;
309 if (istask) {
310 ischecked = item[1] !== ' ';
311 item = item.replace(/^\[[ xX]\] +/, '');
312 }
313 }
314
315 list.items.push({
316 type: 'list_item',
317 raw,
318 task: istask,
319 checked: ischecked,
320 loose: loose,
321 text: item
322 });
323 }
324
325 return list;
326 }
327 }
328
329 html(src) {
330 const cap = this.rules.block.html.exec(src);
331 if (cap) {
332 return {
333 type: this.options.sanitize
334 ? 'paragraph'
335 : 'html',
336 raw: cap[0],
337 pre: !this.options.sanitizer
338 && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
339 text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
340 };
341 }
342 }
343
344 def(src) {
345 const cap = this.rules.block.def.exec(src);
346 if (cap) {
347 if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
348 const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
349 return {
350 type: 'def',
351 tag,
352 raw: cap[0],
353 href: cap[2],
354 title: cap[3]
355 };
356 }
357 }
358
359 table(src) {
360 const cap = this.rules.block.table.exec(src);
361 if (cap) {
362 const item = {
363 type: 'table',
364 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
365 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
366 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
367 };
368
369 if (item.header.length === item.align.length) {
370 item.raw = cap[0];
371
372 let l = item.align.length;
373 let i;
374 for (i = 0; i < l; i++) {
375 if (/^ *-+: *$/.test(item.align[i])) {
376 item.align[i] = 'right';
377 } else if (/^ *:-+: *$/.test(item.align[i])) {
378 item.align[i] = 'center';
379 } else if (/^ *:-+ *$/.test(item.align[i])) {
380 item.align[i] = 'left';
381 } else {
382 item.align[i] = null;
383 }
384 }
385
386 l = item.cells.length;
387 for (i = 0; i < l; i++) {
388 item.cells[i] = splitCells(
389 item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
390 item.header.length);
391 }
392
393 return item;
394 }
395 }
396 }
397
398 lheading(src) {
399 const cap = this.rules.block.lheading.exec(src);
400 if (cap) {
401 return {
402 type: 'heading',
403 raw: cap[0],
404 depth: cap[2].charAt(0) === '=' ? 1 : 2,
405 text: cap[1]
406 };
407 }
408 }
409
410 paragraph(src) {
411 const cap = this.rules.block.paragraph.exec(src);
412 if (cap) {
413 return {
414 type: 'paragraph',
415 raw: cap[0],
416 text: cap[1].charAt(cap[1].length - 1) === '\n'
417 ? cap[1].slice(0, -1)
418 : cap[1]
419 };
420 }
421 }
422
423 text(src) {
424 const cap = this.rules.block.text.exec(src);
425 if (cap) {
426 return {
427 type: 'text',
428 raw: cap[0],
429 text: cap[0]
430 };
431 }
432 }
433
434 escape(src) {
435 const cap = this.rules.inline.escape.exec(src);
436 if (cap) {
437 return {
438 type: 'escape',
439 raw: cap[0],
440 text: escape(cap[1])
441 };
442 }
443 }
444
445 tag(src, inLink, inRawBlock) {
446 const cap = this.rules.inline.tag.exec(src);
447 if (cap) {
448 if (!inLink && /^<a /i.test(cap[0])) {
449 inLink = true;
450 } else if (inLink && /^<\/a>/i.test(cap[0])) {
451 inLink = false;
452 }
453 if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
454 inRawBlock = true;
455 } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
456 inRawBlock = false;
457 }
458
459 return {
460 type: this.options.sanitize
461 ? 'text'
462 : 'html',
463 raw: cap[0],
464 inLink,
465 inRawBlock,
466 text: this.options.sanitize
467 ? (this.options.sanitizer
468 ? this.options.sanitizer(cap[0])
469 : escape(cap[0]))
470 : cap[0]
471 };
472 }
473 }
474
475 link(src) {
476 const cap = this.rules.inline.link.exec(src);
477 if (cap) {
478 const trimmedUrl = cap[2].trim();
479 if (!this.options.pedantic && /^</.test(trimmedUrl)) {
480 // commonmark requires matching angle brackets
481 if (!(/>$/.test(trimmedUrl))) {
482 return;
483 }
484
485 // ending angle bracket cannot be escaped
486 const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
487 if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
488 return;
489 }
490 } else {
491 // find closing parenthesis
492 const lastParenIndex = findClosingBracket(cap[2], '()');
493 if (lastParenIndex > -1) {
494 const start = cap[0].indexOf('!') === 0 ? 5 : 4;
495 const linkLen = start + cap[1].length + lastParenIndex;
496 cap[2] = cap[2].substring(0, lastParenIndex);
497 cap[0] = cap[0].substring(0, linkLen).trim();
498 cap[3] = '';
499 }
500 }
501 let href = cap[2];
502 let title = '';
503 if (this.options.pedantic) {
504 // split pedantic href and title
505 const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
506
507 if (link) {
508 href = link[1];
509 title = link[3];
510 }
511 } else {
512 title = cap[3] ? cap[3].slice(1, -1) : '';
513 }
514
515 href = href.trim();
516 if (/^</.test(href)) {
517 if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
518 // pedantic allows starting angle bracket without ending angle bracket
519 href = href.slice(1);
520 } else {
521 href = href.slice(1, -1);
522 }
523 }
524 return outputLink(cap, {
525 href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
526 title: title ? title.replace(this.rules.inline._escapes, '$1') : title
527 }, cap[0]);
528 }
529 }
530
531 reflink(src, links) {
532 let cap;
533 if ((cap = this.rules.inline.reflink.exec(src))
534 || (cap = this.rules.inline.nolink.exec(src))) {
535 let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
536 link = links[link.toLowerCase()];
537 if (!link || !link.href) {
538 const text = cap[0].charAt(0);
539 return {
540 type: 'text',
541 raw: text,
542 text
543 };
544 }
545 return outputLink(cap, link, cap[0]);
546 }
547 }
548
549 emStrong(src, maskedSrc, prevChar = '') {
550 let match = this.rules.inline.emStrong.lDelim.exec(src);
551 if (!match) return;
552
553 if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return; // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
554
555 const nextChar = match[1] || match[2] || '';
556
557 if (!nextChar || (nextChar && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
558 const lLength = match[0].length - 1;
559 let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
560
561 const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
562 endReg.lastIndex = 0;
563
564 maskedSrc = maskedSrc.slice(-1 * src.length + lLength); // Bump maskedSrc to same section of string as src (move to lexer?)
565
566 while ((match = endReg.exec(maskedSrc)) != null) {
567 rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6];
568
569 if (!rDelim) continue; // matched the first alternative in rules.js (skip the * in __abc*abc__)
570
571 rLength = rDelim.length;
572
573 if (match[3] || match[4]) { // found another Left Delim
574 delimTotal += rLength;
575 continue;
576 } else if (match[5] || match[6]) { // either Left or Right Delim
577 if (lLength % 3 && !((lLength + rLength) % 3)) {
578 midDelimTotal += rLength;
579 continue; // CommonMark Emphasis Rules 9-10
580 }
581 }
582
583 delimTotal -= rLength;
584
585 if (delimTotal > 0) continue; // Haven't found enough closing delimiters
586
587 // If this is the last rDelimiter, remove extra characters. *a*** -> *a*
588 if (delimTotal + midDelimTotal - rLength <= 0 && !maskedSrc.slice(endReg.lastIndex).match(endReg)) {
589 rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
590 }
591
592 if (Math.min(lLength, rLength) % 2) {
593 return {
594 type: 'em',
595 raw: src.slice(0, lLength + match.index + rLength + 1),
596 text: src.slice(1, lLength + match.index + rLength)
597 };
598 }
599 if (Math.min(lLength, rLength) % 2 === 0) {
600 return {
601 type: 'strong',
602 raw: src.slice(0, lLength + match.index + rLength + 1),
603 text: src.slice(2, lLength + match.index + rLength - 1)
604 };
605 }
606 }
607 }
608 }
609
610 codespan(src) {
611 const cap = this.rules.inline.code.exec(src);
612 if (cap) {
613 let text = cap[2].replace(/\n/g, ' ');
614 const hasNonSpaceChars = /[^ ]/.test(text);
615 const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
616 if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
617 text = text.substring(1, text.length - 1);
618 }
619 text = escape(text, true);
620 return {
621 type: 'codespan',
622 raw: cap[0],
623 text
624 };
625 }
626 }
627
628 br(src) {
629 const cap = this.rules.inline.br.exec(src);
630 if (cap) {
631 return {
632 type: 'br',
633 raw: cap[0]
634 };
635 }
636 }
637
638 del(src) {
639 const cap = this.rules.inline.del.exec(src);
640 if (cap) {
641 return {
642 type: 'del',
643 raw: cap[0],
644 text: cap[2]
645 };
646 }
647 }
648
649 autolink(src, mangle) {
650 const cap = this.rules.inline.autolink.exec(src);
651 if (cap) {
652 let text, href;
653 if (cap[2] === '@') {
654 text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
655 href = 'mailto:' + text;
656 } else {
657 text = escape(cap[1]);
658 href = text;
659 }
660
661 return {
662 type: 'link',
663 raw: cap[0],
664 text,
665 href,
666 tokens: [
667 {
668 type: 'text',
669 raw: text,
670 text
671 }
672 ]
673 };
674 }
675 }
676
677 url(src, mangle) {
678 let cap;
679 if (cap = this.rules.inline.url.exec(src)) {
680 let text, href;
681 if (cap[2] === '@') {
682 text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
683 href = 'mailto:' + text;
684 } else {
685 // do extended autolink path validation
686 let prevCapZero;
687 do {
688 prevCapZero = cap[0];
689 cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
690 } while (prevCapZero !== cap[0]);
691 text = escape(cap[0]);
692 if (cap[1] === 'www.') {
693 href = 'http://' + text;
694 } else {
695 href = text;
696 }
697 }
698 return {
699 type: 'link',
700 raw: cap[0],
701 text,
702 href,
703 tokens: [
704 {
705 type: 'text',
706 raw: text,
707 text
708 }
709 ]
710 };
711 }
712 }
713
714 inlineText(src, inRawBlock, smartypants) {
715 const cap = this.rules.inline.text.exec(src);
716 if (cap) {
717 let text;
718 if (inRawBlock) {
719 text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
720 } else {
721 text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
722 }
723 return {
724 type: 'text',
725 raw: cap[0],
726 text
727 };
728 }
729 }
730};