1 | const { defaults } = require('./defaults.js');
|
2 | const {
|
3 | rtrim,
|
4 | splitCells,
|
5 | escape,
|
6 | findClosingBracket
|
7 | } = require('./helpers.js');
|
8 |
|
9 | function outputLink(cap, link, raw) {
|
10 | const href = link.href;
|
11 | const title = link.title ? escape(link.title) : null;
|
12 | const text = cap[1].replace(/\\([\[\]])/g, '$1');
|
13 |
|
14 | if (cap[0].charAt(0) !== '!') {
|
15 | return {
|
16 | type: 'link',
|
17 | raw,
|
18 | href,
|
19 | title,
|
20 | text
|
21 | };
|
22 | } else {
|
23 | return {
|
24 | type: 'image',
|
25 | raw,
|
26 | href,
|
27 | title,
|
28 | text: escape(text)
|
29 | };
|
30 | }
|
31 | }
|
32 |
|
33 | function indentCodeCompensation(raw, text) {
|
34 | const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
|
35 |
|
36 | if (matchIndentToCode === null) {
|
37 | return text;
|
38 | }
|
39 |
|
40 | const indentToCode = matchIndentToCode[1];
|
41 |
|
42 | return text
|
43 | .split('\n')
|
44 | .map(node => {
|
45 | const matchIndentInNode = node.match(/^\s+/);
|
46 | if (matchIndentInNode === null) {
|
47 | return node;
|
48 | }
|
49 |
|
50 | const [indentInNode] = matchIndentInNode;
|
51 |
|
52 | if (indentInNode.length >= indentToCode.length) {
|
53 | return node.slice(indentToCode.length);
|
54 | }
|
55 |
|
56 | return node;
|
57 | })
|
58 | .join('\n');
|
59 | }
|
60 |
|
61 |
|
62 |
|
63 |
|
64 | module.exports = class Tokenizer {
|
65 | constructor(options) {
|
66 | this.options = options || defaults;
|
67 | }
|
68 |
|
69 | space(src) {
|
70 | const cap = this.rules.block.newline.exec(src);
|
71 | if (cap) {
|
72 | if (cap[0].length > 1) {
|
73 | return {
|
74 | type: 'space',
|
75 | raw: cap[0]
|
76 | };
|
77 | }
|
78 | return { raw: '\n' };
|
79 | }
|
80 | }
|
81 |
|
82 | code(src, tokens) {
|
83 | const cap = this.rules.block.code.exec(src);
|
84 | if (cap) {
|
85 | const lastToken = tokens[tokens.length - 1];
|
86 |
|
87 | if (lastToken && lastToken.type === 'paragraph') {
|
88 | return {
|
89 | raw: cap[0],
|
90 | text: cap[0].trimRight()
|
91 | };
|
92 | }
|
93 |
|
94 | const text = cap[0].replace(/^ {1,4}/gm, '');
|
95 | return {
|
96 | type: 'code',
|
97 | raw: cap[0],
|
98 | codeBlockStyle: 'indented',
|
99 | text: !this.options.pedantic
|
100 | ? rtrim(text, '\n')
|
101 | : text
|
102 | };
|
103 | }
|
104 | }
|
105 |
|
106 | fences(src) {
|
107 | const cap = this.rules.block.fences.exec(src);
|
108 | if (cap) {
|
109 | const raw = cap[0];
|
110 | const text = indentCodeCompensation(raw, cap[3] || '');
|
111 |
|
112 | return {
|
113 | type: 'code',
|
114 | raw,
|
115 | lang: cap[2] ? cap[2].trim() : cap[2],
|
116 | text
|
117 | };
|
118 | }
|
119 | }
|
120 |
|
121 | heading(src) {
|
122 | const cap = this.rules.block.heading.exec(src);
|
123 | if (cap) {
|
124 | let text = cap[2].trim();
|
125 |
|
126 |
|
127 | if (/#$/.test(text)) {
|
128 | const trimmed = rtrim(text, '#');
|
129 | if (this.options.pedantic) {
|
130 | text = trimmed.trim();
|
131 | } else if (!trimmed || / $/.test(trimmed)) {
|
132 |
|
133 | text = trimmed.trim();
|
134 | }
|
135 | }
|
136 |
|
137 | return {
|
138 | type: 'heading',
|
139 | raw: cap[0],
|
140 | depth: cap[1].length,
|
141 | text: text
|
142 | };
|
143 | }
|
144 | }
|
145 |
|
146 | nptable(src) {
|
147 | const cap = this.rules.block.nptable.exec(src);
|
148 | if (cap) {
|
149 | const item = {
|
150 | type: 'table',
|
151 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
152 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
153 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
|
154 | raw: cap[0]
|
155 | };
|
156 |
|
157 | if (item.header.length === item.align.length) {
|
158 | let l = item.align.length;
|
159 | let i;
|
160 | for (i = 0; i < l; i++) {
|
161 | if (/^ *-+: *$/.test(item.align[i])) {
|
162 | item.align[i] = 'right';
|
163 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
164 | item.align[i] = 'center';
|
165 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
166 | item.align[i] = 'left';
|
167 | } else {
|
168 | item.align[i] = null;
|
169 | }
|
170 | }
|
171 |
|
172 | l = item.cells.length;
|
173 | for (i = 0; i < l; i++) {
|
174 | item.cells[i] = splitCells(item.cells[i], item.header.length);
|
175 | }
|
176 |
|
177 | return item;
|
178 | }
|
179 | }
|
180 | }
|
181 |
|
182 | hr(src) {
|
183 | const cap = this.rules.block.hr.exec(src);
|
184 | if (cap) {
|
185 | return {
|
186 | type: 'hr',
|
187 | raw: cap[0]
|
188 | };
|
189 | }
|
190 | }
|
191 |
|
192 | blockquote(src) {
|
193 | const cap = this.rules.block.blockquote.exec(src);
|
194 | if (cap) {
|
195 | const text = cap[0].replace(/^ *> ?/gm, '');
|
196 |
|
197 | return {
|
198 | type: 'blockquote',
|
199 | raw: cap[0],
|
200 | text
|
201 | };
|
202 | }
|
203 | }
|
204 |
|
205 | list(src) {
|
206 | const cap = this.rules.block.list.exec(src);
|
207 | if (cap) {
|
208 | let raw = cap[0];
|
209 | const bull = cap[2];
|
210 | const isordered = bull.length > 1;
|
211 |
|
212 | const list = {
|
213 | type: 'list',
|
214 | raw,
|
215 | ordered: isordered,
|
216 | start: isordered ? +bull.slice(0, -1) : '',
|
217 | loose: false,
|
218 | items: []
|
219 | };
|
220 |
|
221 |
|
222 | const itemMatch = cap[0].match(this.rules.block.item);
|
223 |
|
224 | let next = false,
|
225 | item,
|
226 | space,
|
227 | bcurr,
|
228 | bnext,
|
229 | addBack,
|
230 | loose,
|
231 | istask,
|
232 | ischecked;
|
233 |
|
234 | let l = itemMatch.length;
|
235 | bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
|
236 | for (let i = 0; i < l; i++) {
|
237 | item = itemMatch[i];
|
238 | raw = item;
|
239 |
|
240 |
|
241 |
|
242 | if (i !== l - 1) {
|
243 | bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
|
244 | if (
|
245 | !this.options.pedantic
|
246 | ? bnext[1].length > bcurr[0].length || bnext[1].length > 3
|
247 | : bnext[1].length > bcurr[1].length
|
248 | ) {
|
249 |
|
250 | itemMatch.splice(i, 2, itemMatch[i] + '\n' + itemMatch[i + 1]);
|
251 | i--;
|
252 | l--;
|
253 | continue;
|
254 | } else {
|
255 | if (
|
256 |
|
257 | !this.options.pedantic || this.options.smartLists
|
258 | ? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
|
259 | : isordered === (bnext[2].length === 1)
|
260 | ) {
|
261 | addBack = itemMatch.slice(i + 1).join('\n');
|
262 | list.raw = list.raw.substring(0, list.raw.length - addBack.length);
|
263 | i = l - 1;
|
264 | }
|
265 | }
|
266 | bcurr = bnext;
|
267 | }
|
268 |
|
269 |
|
270 |
|
271 | space = item.length;
|
272 | item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');
|
273 |
|
274 |
|
275 |
|
276 | if (~item.indexOf('\n ')) {
|
277 | space -= item.length;
|
278 | item = !this.options.pedantic
|
279 | ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
|
280 | : item.replace(/^ {1,4}/gm, '');
|
281 | }
|
282 |
|
283 |
|
284 |
|
285 |
|
286 | loose = next || /\n\n(?!\s*$)/.test(item);
|
287 | if (i !== l - 1) {
|
288 | next = item.charAt(item.length - 1) === '\n';
|
289 | if (!loose) loose = next;
|
290 | }
|
291 |
|
292 | if (loose) {
|
293 | list.loose = true;
|
294 | }
|
295 |
|
296 |
|
297 | if (this.options.gfm) {
|
298 | istask = /^\[[ xX]\] /.test(item);
|
299 | ischecked = undefined;
|
300 | if (istask) {
|
301 | ischecked = item[1] !== ' ';
|
302 | item = item.replace(/^\[[ xX]\] +/, '');
|
303 | }
|
304 | }
|
305 |
|
306 | list.items.push({
|
307 | type: 'list_item',
|
308 | raw,
|
309 | task: istask,
|
310 | checked: ischecked,
|
311 | loose: loose,
|
312 | text: item
|
313 | });
|
314 | }
|
315 |
|
316 | return list;
|
317 | }
|
318 | }
|
319 |
|
320 | html(src) {
|
321 | const cap = this.rules.block.html.exec(src);
|
322 | if (cap) {
|
323 | return {
|
324 | type: this.options.sanitize
|
325 | ? 'paragraph'
|
326 | : 'html',
|
327 | raw: cap[0],
|
328 | pre: !this.options.sanitizer
|
329 | && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
|
330 | text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
|
331 | };
|
332 | }
|
333 | }
|
334 |
|
335 | def(src) {
|
336 | const cap = this.rules.block.def.exec(src);
|
337 | if (cap) {
|
338 | if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
|
339 | const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
|
340 | return {
|
341 | tag,
|
342 | raw: cap[0],
|
343 | href: cap[2],
|
344 | title: cap[3]
|
345 | };
|
346 | }
|
347 | }
|
348 |
|
349 | table(src) {
|
350 | const cap = this.rules.block.table.exec(src);
|
351 | if (cap) {
|
352 | const item = {
|
353 | type: 'table',
|
354 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
355 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
356 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
|
357 | };
|
358 |
|
359 | if (item.header.length === item.align.length) {
|
360 | item.raw = cap[0];
|
361 |
|
362 | let l = item.align.length;
|
363 | let i;
|
364 | for (i = 0; i < l; i++) {
|
365 | if (/^ *-+: *$/.test(item.align[i])) {
|
366 | item.align[i] = 'right';
|
367 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
368 | item.align[i] = 'center';
|
369 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
370 | item.align[i] = 'left';
|
371 | } else {
|
372 | item.align[i] = null;
|
373 | }
|
374 | }
|
375 |
|
376 | l = item.cells.length;
|
377 | for (i = 0; i < l; i++) {
|
378 | item.cells[i] = splitCells(
|
379 | item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
|
380 | item.header.length);
|
381 | }
|
382 |
|
383 | return item;
|
384 | }
|
385 | }
|
386 | }
|
387 |
|
388 | lheading(src) {
|
389 | const cap = this.rules.block.lheading.exec(src);
|
390 | if (cap) {
|
391 | return {
|
392 | type: 'heading',
|
393 | raw: cap[0],
|
394 | depth: cap[2].charAt(0) === '=' ? 1 : 2,
|
395 | text: cap[1]
|
396 | };
|
397 | }
|
398 | }
|
399 |
|
400 | paragraph(src) {
|
401 | const cap = this.rules.block.paragraph.exec(src);
|
402 | if (cap) {
|
403 | return {
|
404 | type: 'paragraph',
|
405 | raw: cap[0],
|
406 | text: cap[1].charAt(cap[1].length - 1) === '\n'
|
407 | ? cap[1].slice(0, -1)
|
408 | : cap[1]
|
409 | };
|
410 | }
|
411 | }
|
412 |
|
413 | text(src, tokens) {
|
414 | const cap = this.rules.block.text.exec(src);
|
415 | if (cap) {
|
416 | const lastToken = tokens[tokens.length - 1];
|
417 | if (lastToken && lastToken.type === 'text') {
|
418 | return {
|
419 | raw: cap[0],
|
420 | text: cap[0]
|
421 | };
|
422 | }
|
423 |
|
424 | return {
|
425 | type: 'text',
|
426 | raw: cap[0],
|
427 | text: cap[0]
|
428 | };
|
429 | }
|
430 | }
|
431 |
|
432 | escape(src) {
|
433 | const cap = this.rules.inline.escape.exec(src);
|
434 | if (cap) {
|
435 | return {
|
436 | type: 'escape',
|
437 | raw: cap[0],
|
438 | text: escape(cap[1])
|
439 | };
|
440 | }
|
441 | }
|
442 |
|
443 | tag(src, inLink, inRawBlock) {
|
444 | const cap = this.rules.inline.tag.exec(src);
|
445 | if (cap) {
|
446 | if (!inLink && /^<a /i.test(cap[0])) {
|
447 | inLink = true;
|
448 | } else if (inLink && /^<\/a>/i.test(cap[0])) {
|
449 | inLink = false;
|
450 | }
|
451 | if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
452 | inRawBlock = true;
|
453 | } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
454 | inRawBlock = false;
|
455 | }
|
456 |
|
457 | return {
|
458 | type: this.options.sanitize
|
459 | ? 'text'
|
460 | : 'html',
|
461 | raw: cap[0],
|
462 | inLink,
|
463 | inRawBlock,
|
464 | text: this.options.sanitize
|
465 | ? (this.options.sanitizer
|
466 | ? this.options.sanitizer(cap[0])
|
467 | : escape(cap[0]))
|
468 | : cap[0]
|
469 | };
|
470 | }
|
471 | }
|
472 |
|
473 | link(src) {
|
474 | const cap = this.rules.inline.link.exec(src);
|
475 | if (cap) {
|
476 | const trimmedUrl = cap[2].trim();
|
477 | if (!this.options.pedantic && /^</.test(trimmedUrl)) {
|
478 |
|
479 | if (!(/>$/.test(trimmedUrl))) {
|
480 | return;
|
481 | }
|
482 |
|
483 |
|
484 | const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
|
485 | if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
|
486 | return;
|
487 | }
|
488 | } else {
|
489 |
|
490 | const lastParenIndex = findClosingBracket(cap[2], '()');
|
491 | if (lastParenIndex > -1) {
|
492 | const start = cap[0].indexOf('!') === 0 ? 5 : 4;
|
493 | const linkLen = start + cap[1].length + lastParenIndex;
|
494 | cap[2] = cap[2].substring(0, lastParenIndex);
|
495 | cap[0] = cap[0].substring(0, linkLen).trim();
|
496 | cap[3] = '';
|
497 | }
|
498 | }
|
499 | let href = cap[2];
|
500 | let title = '';
|
501 | if (this.options.pedantic) {
|
502 |
|
503 | const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
|
504 |
|
505 | if (link) {
|
506 | href = link[1];
|
507 | title = link[3];
|
508 | }
|
509 | } else {
|
510 | title = cap[3] ? cap[3].slice(1, -1) : '';
|
511 | }
|
512 |
|
513 | href = href.trim();
|
514 | if (/^</.test(href)) {
|
515 | if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
|
516 |
|
517 | href = href.slice(1);
|
518 | } else {
|
519 | href = href.slice(1, -1);
|
520 | }
|
521 | }
|
522 | return outputLink(cap, {
|
523 | href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
|
524 | title: title ? title.replace(this.rules.inline._escapes, '$1') : title
|
525 | }, cap[0]);
|
526 | }
|
527 | }
|
528 |
|
529 | reflink(src, links) {
|
530 | let cap;
|
531 | if ((cap = this.rules.inline.reflink.exec(src))
|
532 | || (cap = this.rules.inline.nolink.exec(src))) {
|
533 | let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
|
534 | link = links[link.toLowerCase()];
|
535 | if (!link || !link.href) {
|
536 | const text = cap[0].charAt(0);
|
537 | return {
|
538 | type: 'text',
|
539 | raw: text,
|
540 | text
|
541 | };
|
542 | }
|
543 | return outputLink(cap, link, cap[0]);
|
544 | }
|
545 | }
|
546 |
|
547 | strong(src, maskedSrc, prevChar = '') {
|
548 | let match = this.rules.inline.strong.start.exec(src);
|
549 |
|
550 | if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
|
551 | maskedSrc = maskedSrc.slice(-1 * src.length);
|
552 | const endReg = match[0] === '**' ? this.rules.inline.strong.endAst : this.rules.inline.strong.endUnd;
|
553 |
|
554 | endReg.lastIndex = 0;
|
555 |
|
556 | let cap;
|
557 | while ((match = endReg.exec(maskedSrc)) != null) {
|
558 | cap = this.rules.inline.strong.middle.exec(maskedSrc.slice(0, match.index + 3));
|
559 | if (cap) {
|
560 | return {
|
561 | type: 'strong',
|
562 | raw: src.slice(0, cap[0].length),
|
563 | text: src.slice(2, cap[0].length - 2)
|
564 | };
|
565 | }
|
566 | }
|
567 | }
|
568 | }
|
569 |
|
570 | em(src, maskedSrc, prevChar = '') {
|
571 | let match = this.rules.inline.em.start.exec(src);
|
572 |
|
573 | if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
|
574 | maskedSrc = maskedSrc.slice(-1 * src.length);
|
575 | const endReg = match[0] === '*' ? this.rules.inline.em.endAst : this.rules.inline.em.endUnd;
|
576 |
|
577 | endReg.lastIndex = 0;
|
578 |
|
579 | let cap;
|
580 | while ((match = endReg.exec(maskedSrc)) != null) {
|
581 | cap = this.rules.inline.em.middle.exec(maskedSrc.slice(0, match.index + 2));
|
582 | if (cap) {
|
583 | return {
|
584 | type: 'em',
|
585 | raw: src.slice(0, cap[0].length),
|
586 | text: src.slice(1, cap[0].length - 1)
|
587 | };
|
588 | }
|
589 | }
|
590 | }
|
591 | }
|
592 |
|
593 | codespan(src) {
|
594 | const cap = this.rules.inline.code.exec(src);
|
595 | if (cap) {
|
596 | let text = cap[2].replace(/\n/g, ' ');
|
597 | const hasNonSpaceChars = /[^ ]/.test(text);
|
598 | const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
|
599 | if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
|
600 | text = text.substring(1, text.length - 1);
|
601 | }
|
602 | text = escape(text, true);
|
603 | return {
|
604 | type: 'codespan',
|
605 | raw: cap[0],
|
606 | text
|
607 | };
|
608 | }
|
609 | }
|
610 |
|
611 | br(src) {
|
612 | const cap = this.rules.inline.br.exec(src);
|
613 | if (cap) {
|
614 | return {
|
615 | type: 'br',
|
616 | raw: cap[0]
|
617 | };
|
618 | }
|
619 | }
|
620 |
|
621 | del(src) {
|
622 | const cap = this.rules.inline.del.exec(src);
|
623 | if (cap) {
|
624 | return {
|
625 | type: 'del',
|
626 | raw: cap[0],
|
627 | text: cap[2]
|
628 | };
|
629 | }
|
630 | }
|
631 |
|
632 | autolink(src, mangle) {
|
633 | const cap = this.rules.inline.autolink.exec(src);
|
634 | if (cap) {
|
635 | let text, href;
|
636 | if (cap[2] === '@') {
|
637 | text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
|
638 | href = 'mailto:' + text;
|
639 | } else {
|
640 | text = escape(cap[1]);
|
641 | href = text;
|
642 | }
|
643 |
|
644 | return {
|
645 | type: 'link',
|
646 | raw: cap[0],
|
647 | text,
|
648 | href,
|
649 | tokens: [
|
650 | {
|
651 | type: 'text',
|
652 | raw: text,
|
653 | text
|
654 | }
|
655 | ]
|
656 | };
|
657 | }
|
658 | }
|
659 |
|
660 | url(src, mangle) {
|
661 | let cap;
|
662 | if (cap = this.rules.inline.url.exec(src)) {
|
663 | let text, href;
|
664 | if (cap[2] === '@') {
|
665 | text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
|
666 | href = 'mailto:' + text;
|
667 | } else {
|
668 |
|
669 | let prevCapZero;
|
670 | do {
|
671 | prevCapZero = cap[0];
|
672 | cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
|
673 | } while (prevCapZero !== cap[0]);
|
674 | text = escape(cap[0]);
|
675 | if (cap[1] === 'www.') {
|
676 | href = 'http://' + text;
|
677 | } else {
|
678 | href = text;
|
679 | }
|
680 | }
|
681 | return {
|
682 | type: 'link',
|
683 | raw: cap[0],
|
684 | text,
|
685 | href,
|
686 | tokens: [
|
687 | {
|
688 | type: 'text',
|
689 | raw: text,
|
690 | text
|
691 | }
|
692 | ]
|
693 | };
|
694 | }
|
695 | }
|
696 |
|
697 | inlineText(src, inRawBlock, smartypants) {
|
698 | const cap = this.rules.inline.text.exec(src);
|
699 | if (cap) {
|
700 | let text;
|
701 | if (inRawBlock) {
|
702 | text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
|
703 | } else {
|
704 | text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
|
705 | }
|
706 | return {
|
707 | type: 'text',
|
708 | raw: cap[0],
|
709 | text
|
710 | };
|
711 | }
|
712 | }
|
713 | };
|