1 | const { defaults } = require('./defaults.js');
|
2 | const {
|
3 | rtrim,
|
4 | splitCells,
|
5 | escape,
|
6 | findClosingBracket
|
7 | } = require('./helpers.js');
|
8 |
|
9 | function outputLink(cap, link, raw) {
|
10 | const href = link.href;
|
11 | const title = link.title ? escape(link.title) : null;
|
12 | const text = cap[1].replace(/\\([\[\]])/g, '$1');
|
13 |
|
14 | if (cap[0].charAt(0) !== '!') {
|
15 | return {
|
16 | type: 'link',
|
17 | raw,
|
18 | href,
|
19 | title,
|
20 | text
|
21 | };
|
22 | } else {
|
23 | return {
|
24 | type: 'image',
|
25 | raw,
|
26 | href,
|
27 | title,
|
28 | text: escape(text)
|
29 | };
|
30 | }
|
31 | }
|
32 |
|
33 | function indentCodeCompensation(raw, text) {
|
34 | const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
|
35 |
|
36 | if (matchIndentToCode === null) {
|
37 | return text;
|
38 | }
|
39 |
|
40 | const indentToCode = matchIndentToCode[1];
|
41 |
|
42 | return text
|
43 | .split('\n')
|
44 | .map(node => {
|
45 | const matchIndentInNode = node.match(/^\s+/);
|
46 | if (matchIndentInNode === null) {
|
47 | return node;
|
48 | }
|
49 |
|
50 | const [indentInNode] = matchIndentInNode;
|
51 |
|
52 | if (indentInNode.length >= indentToCode.length) {
|
53 | return node.slice(indentToCode.length);
|
54 | }
|
55 |
|
56 | return node;
|
57 | })
|
58 | .join('\n');
|
59 | }
|
60 |
|
61 |
|
62 |
|
63 |
|
64 | module.exports = class Tokenizer {
|
65 | constructor(options) {
|
66 | this.options = options || defaults;
|
67 | }
|
68 |
|
69 | space(src) {
|
70 | const cap = this.rules.block.newline.exec(src);
|
71 | if (cap) {
|
72 | if (cap[0].length > 1) {
|
73 | return {
|
74 | type: 'space',
|
75 | raw: cap[0]
|
76 | };
|
77 | }
|
78 | return { raw: '\n' };
|
79 | }
|
80 | }
|
81 |
|
82 | code(src, tokens) {
|
83 | const cap = this.rules.block.code.exec(src);
|
84 | if (cap) {
|
85 | const lastToken = tokens[tokens.length - 1];
|
86 |
|
87 | if (lastToken && lastToken.type === 'paragraph') {
|
88 | return {
|
89 | raw: cap[0],
|
90 | text: cap[0].trimRight()
|
91 | };
|
92 | }
|
93 |
|
94 | const text = cap[0].replace(/^ {4}/gm, '');
|
95 | return {
|
96 | type: 'code',
|
97 | raw: cap[0],
|
98 | codeBlockStyle: 'indented',
|
99 | text: !this.options.pedantic
|
100 | ? rtrim(text, '\n')
|
101 | : text
|
102 | };
|
103 | }
|
104 | }
|
105 |
|
106 | fences(src) {
|
107 | const cap = this.rules.block.fences.exec(src);
|
108 | if (cap) {
|
109 | const raw = cap[0];
|
110 | const text = indentCodeCompensation(raw, cap[3] || '');
|
111 |
|
112 | return {
|
113 | type: 'code',
|
114 | raw,
|
115 | lang: cap[2] ? cap[2].trim() : cap[2],
|
116 | text
|
117 | };
|
118 | }
|
119 | }
|
120 |
|
121 | heading(src) {
|
122 | const cap = this.rules.block.heading.exec(src);
|
123 | if (cap) {
|
124 | let text = cap[2].trim();
|
125 |
|
126 |
|
127 | if (/#$/.test(text)) {
|
128 | const trimmed = rtrim(text, '#');
|
129 | if (this.options.pedantic) {
|
130 | text = trimmed.trim();
|
131 | } else if (!trimmed || / $/.test(trimmed)) {
|
132 |
|
133 | text = trimmed.trim();
|
134 | }
|
135 | }
|
136 |
|
137 | return {
|
138 | type: 'heading',
|
139 | raw: cap[0],
|
140 | depth: cap[1].length,
|
141 | text: text
|
142 | };
|
143 | }
|
144 | }
|
145 |
|
146 | nptable(src) {
|
147 | const cap = this.rules.block.nptable.exec(src);
|
148 | if (cap) {
|
149 | const item = {
|
150 | type: 'table',
|
151 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
152 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
153 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
|
154 | raw: cap[0]
|
155 | };
|
156 |
|
157 | if (item.header.length === item.align.length) {
|
158 | let l = item.align.length;
|
159 | let i;
|
160 | for (i = 0; i < l; i++) {
|
161 | if (/^ *-+: *$/.test(item.align[i])) {
|
162 | item.align[i] = 'right';
|
163 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
164 | item.align[i] = 'center';
|
165 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
166 | item.align[i] = 'left';
|
167 | } else {
|
168 | item.align[i] = null;
|
169 | }
|
170 | }
|
171 |
|
172 | l = item.cells.length;
|
173 | for (i = 0; i < l; i++) {
|
174 | item.cells[i] = splitCells(item.cells[i], item.header.length);
|
175 | }
|
176 |
|
177 | return item;
|
178 | }
|
179 | }
|
180 | }
|
181 |
|
182 | hr(src) {
|
183 | const cap = this.rules.block.hr.exec(src);
|
184 | if (cap) {
|
185 | return {
|
186 | type: 'hr',
|
187 | raw: cap[0]
|
188 | };
|
189 | }
|
190 | }
|
191 |
|
192 | blockquote(src) {
|
193 | const cap = this.rules.block.blockquote.exec(src);
|
194 | if (cap) {
|
195 | const text = cap[0].replace(/^ *> ?/gm, '');
|
196 |
|
197 | return {
|
198 | type: 'blockquote',
|
199 | raw: cap[0],
|
200 | text
|
201 | };
|
202 | }
|
203 | }
|
204 |
|
205 | list(src) {
|
206 | const cap = this.rules.block.list.exec(src);
|
207 | if (cap) {
|
208 | let raw = cap[0];
|
209 | const bull = cap[2];
|
210 | const isordered = bull.length > 1;
|
211 |
|
212 | const list = {
|
213 | type: 'list',
|
214 | raw,
|
215 | ordered: isordered,
|
216 | start: isordered ? +bull.slice(0, -1) : '',
|
217 | loose: false,
|
218 | items: []
|
219 | };
|
220 |
|
221 |
|
222 | const itemMatch = cap[0].match(this.rules.block.item);
|
223 |
|
224 | let next = false,
|
225 | item,
|
226 | space,
|
227 | bcurr,
|
228 | bnext,
|
229 | addBack,
|
230 | loose,
|
231 | istask,
|
232 | ischecked;
|
233 |
|
234 | let l = itemMatch.length;
|
235 | bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
|
236 | for (let i = 0; i < l; i++) {
|
237 | item = itemMatch[i];
|
238 | raw = item;
|
239 |
|
240 |
|
241 |
|
242 | if (i !== l - 1) {
|
243 | bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
|
244 |
|
245 | if (bnext[1].length > bcurr[0].length || bnext[1].length > 3) {
|
246 |
|
247 | itemMatch.splice(i, 2, itemMatch[i] + '\n' + itemMatch[i + 1]);
|
248 | i--;
|
249 | l--;
|
250 | continue;
|
251 | } else {
|
252 | if (
|
253 |
|
254 | !this.options.pedantic || this.options.smartLists
|
255 | ? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
|
256 | : isordered === (bnext[2].length === 1)
|
257 | ) {
|
258 | addBack = itemMatch.slice(i + 1).join('\n');
|
259 | list.raw = list.raw.substring(0, list.raw.length - addBack.length);
|
260 | i = l - 1;
|
261 | }
|
262 | }
|
263 | bcurr = bnext;
|
264 | }
|
265 |
|
266 |
|
267 |
|
268 | space = item.length;
|
269 | item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');
|
270 |
|
271 |
|
272 |
|
273 | if (~item.indexOf('\n ')) {
|
274 | space -= item.length;
|
275 | item = !this.options.pedantic
|
276 | ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
|
277 | : item.replace(/^ {1,4}/gm, '');
|
278 | }
|
279 |
|
280 |
|
281 |
|
282 |
|
283 | loose = next || /\n\n(?!\s*$)/.test(item);
|
284 | if (i !== l - 1) {
|
285 | next = item.charAt(item.length - 1) === '\n';
|
286 | if (!loose) loose = next;
|
287 | }
|
288 |
|
289 | if (loose) {
|
290 | list.loose = true;
|
291 | }
|
292 |
|
293 |
|
294 | if (this.options.gfm) {
|
295 | istask = /^\[[ xX]\] /.test(item);
|
296 | ischecked = undefined;
|
297 | if (istask) {
|
298 | ischecked = item[1] !== ' ';
|
299 | item = item.replace(/^\[[ xX]\] +/, '');
|
300 | }
|
301 | }
|
302 |
|
303 | list.items.push({
|
304 | type: 'list_item',
|
305 | raw,
|
306 | task: istask,
|
307 | checked: ischecked,
|
308 | loose: loose,
|
309 | text: item
|
310 | });
|
311 | }
|
312 |
|
313 | return list;
|
314 | }
|
315 | }
|
316 |
|
317 | html(src) {
|
318 | const cap = this.rules.block.html.exec(src);
|
319 | if (cap) {
|
320 | return {
|
321 | type: this.options.sanitize
|
322 | ? 'paragraph'
|
323 | : 'html',
|
324 | raw: cap[0],
|
325 | pre: !this.options.sanitizer
|
326 | && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
|
327 | text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
|
328 | };
|
329 | }
|
330 | }
|
331 |
|
332 | def(src) {
|
333 | const cap = this.rules.block.def.exec(src);
|
334 | if (cap) {
|
335 | if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
|
336 | const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
|
337 | return {
|
338 | tag,
|
339 | raw: cap[0],
|
340 | href: cap[2],
|
341 | title: cap[3]
|
342 | };
|
343 | }
|
344 | }
|
345 |
|
346 | table(src) {
|
347 | const cap = this.rules.block.table.exec(src);
|
348 | if (cap) {
|
349 | const item = {
|
350 | type: 'table',
|
351 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
352 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
353 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
|
354 | };
|
355 |
|
356 | if (item.header.length === item.align.length) {
|
357 | item.raw = cap[0];
|
358 |
|
359 | let l = item.align.length;
|
360 | let i;
|
361 | for (i = 0; i < l; i++) {
|
362 | if (/^ *-+: *$/.test(item.align[i])) {
|
363 | item.align[i] = 'right';
|
364 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
365 | item.align[i] = 'center';
|
366 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
367 | item.align[i] = 'left';
|
368 | } else {
|
369 | item.align[i] = null;
|
370 | }
|
371 | }
|
372 |
|
373 | l = item.cells.length;
|
374 | for (i = 0; i < l; i++) {
|
375 | item.cells[i] = splitCells(
|
376 | item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
|
377 | item.header.length);
|
378 | }
|
379 |
|
380 | return item;
|
381 | }
|
382 | }
|
383 | }
|
384 |
|
385 | lheading(src) {
|
386 | const cap = this.rules.block.lheading.exec(src);
|
387 | if (cap) {
|
388 | return {
|
389 | type: 'heading',
|
390 | raw: cap[0],
|
391 | depth: cap[2].charAt(0) === '=' ? 1 : 2,
|
392 | text: cap[1]
|
393 | };
|
394 | }
|
395 | }
|
396 |
|
397 | paragraph(src) {
|
398 | const cap = this.rules.block.paragraph.exec(src);
|
399 | if (cap) {
|
400 | return {
|
401 | type: 'paragraph',
|
402 | raw: cap[0],
|
403 | text: cap[1].charAt(cap[1].length - 1) === '\n'
|
404 | ? cap[1].slice(0, -1)
|
405 | : cap[1]
|
406 | };
|
407 | }
|
408 | }
|
409 |
|
410 | text(src, tokens) {
|
411 | const cap = this.rules.block.text.exec(src);
|
412 | if (cap) {
|
413 | const lastToken = tokens[tokens.length - 1];
|
414 | if (lastToken && lastToken.type === 'text') {
|
415 | return {
|
416 | raw: cap[0],
|
417 | text: cap[0]
|
418 | };
|
419 | }
|
420 |
|
421 | return {
|
422 | type: 'text',
|
423 | raw: cap[0],
|
424 | text: cap[0]
|
425 | };
|
426 | }
|
427 | }
|
428 |
|
429 | escape(src) {
|
430 | const cap = this.rules.inline.escape.exec(src);
|
431 | if (cap) {
|
432 | return {
|
433 | type: 'escape',
|
434 | raw: cap[0],
|
435 | text: escape(cap[1])
|
436 | };
|
437 | }
|
438 | }
|
439 |
|
440 | tag(src, inLink, inRawBlock) {
|
441 | const cap = this.rules.inline.tag.exec(src);
|
442 | if (cap) {
|
443 | if (!inLink && /^<a /i.test(cap[0])) {
|
444 | inLink = true;
|
445 | } else if (inLink && /^<\/a>/i.test(cap[0])) {
|
446 | inLink = false;
|
447 | }
|
448 | if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
449 | inRawBlock = true;
|
450 | } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
451 | inRawBlock = false;
|
452 | }
|
453 |
|
454 | return {
|
455 | type: this.options.sanitize
|
456 | ? 'text'
|
457 | : 'html',
|
458 | raw: cap[0],
|
459 | inLink,
|
460 | inRawBlock,
|
461 | text: this.options.sanitize
|
462 | ? (this.options.sanitizer
|
463 | ? this.options.sanitizer(cap[0])
|
464 | : escape(cap[0]))
|
465 | : cap[0]
|
466 | };
|
467 | }
|
468 | }
|
469 |
|
470 | link(src) {
|
471 | const cap = this.rules.inline.link.exec(src);
|
472 | if (cap) {
|
473 | const trimmedUrl = cap[2].trim();
|
474 | if (!this.options.pedantic && /^</.test(trimmedUrl)) {
|
475 |
|
476 | if (!(/>$/.test(trimmedUrl))) {
|
477 | return;
|
478 | }
|
479 |
|
480 |
|
481 | const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
|
482 | if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
|
483 | return;
|
484 | }
|
485 | } else {
|
486 |
|
487 | const lastParenIndex = findClosingBracket(cap[2], '()');
|
488 | if (lastParenIndex > -1) {
|
489 | const start = cap[0].indexOf('!') === 0 ? 5 : 4;
|
490 | const linkLen = start + cap[1].length + lastParenIndex;
|
491 | cap[2] = cap[2].substring(0, lastParenIndex);
|
492 | cap[0] = cap[0].substring(0, linkLen).trim();
|
493 | cap[3] = '';
|
494 | }
|
495 | }
|
496 | let href = cap[2];
|
497 | let title = '';
|
498 | if (this.options.pedantic) {
|
499 |
|
500 | const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
|
501 |
|
502 | if (link) {
|
503 | href = link[1];
|
504 | title = link[3];
|
505 | }
|
506 | } else {
|
507 | title = cap[3] ? cap[3].slice(1, -1) : '';
|
508 | }
|
509 |
|
510 | href = href.trim();
|
511 | if (/^</.test(href)) {
|
512 | if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
|
513 |
|
514 | href = href.slice(1);
|
515 | } else {
|
516 | href = href.slice(1, -1);
|
517 | }
|
518 | }
|
519 | return outputLink(cap, {
|
520 | href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
|
521 | title: title ? title.replace(this.rules.inline._escapes, '$1') : title
|
522 | }, cap[0]);
|
523 | }
|
524 | }
|
525 |
|
526 | reflink(src, links) {
|
527 | let cap;
|
528 | if ((cap = this.rules.inline.reflink.exec(src))
|
529 | || (cap = this.rules.inline.nolink.exec(src))) {
|
530 | let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
|
531 | link = links[link.toLowerCase()];
|
532 | if (!link || !link.href) {
|
533 | const text = cap[0].charAt(0);
|
534 | return {
|
535 | type: 'text',
|
536 | raw: text,
|
537 | text
|
538 | };
|
539 | }
|
540 | return outputLink(cap, link, cap[0]);
|
541 | }
|
542 | }
|
543 |
|
544 | strong(src, maskedSrc, prevChar = '') {
|
545 | let match = this.rules.inline.strong.start.exec(src);
|
546 |
|
547 | if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
|
548 | maskedSrc = maskedSrc.slice(-1 * src.length);
|
549 | const endReg = match[0] === '**' ? this.rules.inline.strong.endAst : this.rules.inline.strong.endUnd;
|
550 |
|
551 | endReg.lastIndex = 0;
|
552 |
|
553 | let cap;
|
554 | while ((match = endReg.exec(maskedSrc)) != null) {
|
555 | cap = this.rules.inline.strong.middle.exec(maskedSrc.slice(0, match.index + 3));
|
556 | if (cap) {
|
557 | return {
|
558 | type: 'strong',
|
559 | raw: src.slice(0, cap[0].length),
|
560 | text: src.slice(2, cap[0].length - 2)
|
561 | };
|
562 | }
|
563 | }
|
564 | }
|
565 | }
|
566 |
|
567 | em(src, maskedSrc, prevChar = '') {
|
568 | let match = this.rules.inline.em.start.exec(src);
|
569 |
|
570 | if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
|
571 | maskedSrc = maskedSrc.slice(-1 * src.length);
|
572 | const endReg = match[0] === '*' ? this.rules.inline.em.endAst : this.rules.inline.em.endUnd;
|
573 |
|
574 | endReg.lastIndex = 0;
|
575 |
|
576 | let cap;
|
577 | while ((match = endReg.exec(maskedSrc)) != null) {
|
578 | cap = this.rules.inline.em.middle.exec(maskedSrc.slice(0, match.index + 2));
|
579 | if (cap) {
|
580 | return {
|
581 | type: 'em',
|
582 | raw: src.slice(0, cap[0].length),
|
583 | text: src.slice(1, cap[0].length - 1)
|
584 | };
|
585 | }
|
586 | }
|
587 | }
|
588 | }
|
589 |
|
590 | codespan(src) {
|
591 | const cap = this.rules.inline.code.exec(src);
|
592 | if (cap) {
|
593 | let text = cap[2].replace(/\n/g, ' ');
|
594 | const hasNonSpaceChars = /[^ ]/.test(text);
|
595 | const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
|
596 | if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
|
597 | text = text.substring(1, text.length - 1);
|
598 | }
|
599 | text = escape(text, true);
|
600 | return {
|
601 | type: 'codespan',
|
602 | raw: cap[0],
|
603 | text
|
604 | };
|
605 | }
|
606 | }
|
607 |
|
608 | br(src) {
|
609 | const cap = this.rules.inline.br.exec(src);
|
610 | if (cap) {
|
611 | return {
|
612 | type: 'br',
|
613 | raw: cap[0]
|
614 | };
|
615 | }
|
616 | }
|
617 |
|
618 | del(src) {
|
619 | const cap = this.rules.inline.del.exec(src);
|
620 | if (cap) {
|
621 | return {
|
622 | type: 'del',
|
623 | raw: cap[0],
|
624 | text: cap[2]
|
625 | };
|
626 | }
|
627 | }
|
628 |
|
629 | autolink(src, mangle) {
|
630 | const cap = this.rules.inline.autolink.exec(src);
|
631 | if (cap) {
|
632 | let text, href;
|
633 | if (cap[2] === '@') {
|
634 | text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
|
635 | href = 'mailto:' + text;
|
636 | } else {
|
637 | text = escape(cap[1]);
|
638 | href = text;
|
639 | }
|
640 |
|
641 | return {
|
642 | type: 'link',
|
643 | raw: cap[0],
|
644 | text,
|
645 | href,
|
646 | tokens: [
|
647 | {
|
648 | type: 'text',
|
649 | raw: text,
|
650 | text
|
651 | }
|
652 | ]
|
653 | };
|
654 | }
|
655 | }
|
656 |
|
657 | url(src, mangle) {
|
658 | let cap;
|
659 | if (cap = this.rules.inline.url.exec(src)) {
|
660 | let text, href;
|
661 | if (cap[2] === '@') {
|
662 | text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
|
663 | href = 'mailto:' + text;
|
664 | } else {
|
665 |
|
666 | let prevCapZero;
|
667 | do {
|
668 | prevCapZero = cap[0];
|
669 | cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
|
670 | } while (prevCapZero !== cap[0]);
|
671 | text = escape(cap[0]);
|
672 | if (cap[1] === 'www.') {
|
673 | href = 'http://' + text;
|
674 | } else {
|
675 | href = text;
|
676 | }
|
677 | }
|
678 | return {
|
679 | type: 'link',
|
680 | raw: cap[0],
|
681 | text,
|
682 | href,
|
683 | tokens: [
|
684 | {
|
685 | type: 'text',
|
686 | raw: text,
|
687 | text
|
688 | }
|
689 | ]
|
690 | };
|
691 | }
|
692 | }
|
693 |
|
694 | inlineText(src, inRawBlock, smartypants) {
|
695 | const cap = this.rules.inline.text.exec(src);
|
696 | if (cap) {
|
697 | let text;
|
698 | if (inRawBlock) {
|
699 | text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
|
700 | } else {
|
701 | text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
|
702 | }
|
703 | return {
|
704 | type: 'text',
|
705 | raw: cap[0],
|
706 | text
|
707 | };
|
708 | }
|
709 | }
|
710 | };
|