1 | const { defaults } = require('./defaults.js');
|
2 | const {
|
3 | rtrim,
|
4 | splitCells,
|
5 | escape,
|
6 | findClosingBracket
|
7 | } = require('./helpers.js');
|
8 |
|
9 | function outputLink(cap, link, raw) {
|
10 | const href = link.href;
|
11 | const title = link.title ? escape(link.title) : null;
|
12 |
|
13 | if (cap[0].charAt(0) !== '!') {
|
14 | return {
|
15 | type: 'link',
|
16 | raw,
|
17 | href,
|
18 | title,
|
19 | text: cap[1]
|
20 | };
|
21 | } else {
|
22 | return {
|
23 | type: 'image',
|
24 | raw,
|
25 | text: escape(cap[1]),
|
26 | href,
|
27 | title
|
28 | };
|
29 | }
|
30 | }
|
31 |
|
32 | function indentCodeCompensation(raw, text) {
|
33 | const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
|
34 |
|
35 | if (matchIndentToCode === null) {
|
36 | return text;
|
37 | }
|
38 |
|
39 | const indentToCode = matchIndentToCode[1];
|
40 |
|
41 | return text
|
42 | .split('\n')
|
43 | .map(node => {
|
44 | const matchIndentInNode = node.match(/^\s+/);
|
45 | if (matchIndentInNode === null) {
|
46 | return node;
|
47 | }
|
48 |
|
49 | const [indentInNode] = matchIndentInNode;
|
50 |
|
51 | if (indentInNode.length >= indentToCode.length) {
|
52 | return node.slice(indentToCode.length);
|
53 | }
|
54 |
|
55 | return node;
|
56 | })
|
57 | .join('\n');
|
58 | }
|
59 |
|
60 |
|
61 |
|
62 |
|
63 | module.exports = class Tokenizer {
|
64 | constructor(options) {
|
65 | this.options = options || defaults;
|
66 | }
|
67 |
|
68 | space(src) {
|
69 | const cap = this.rules.block.newline.exec(src);
|
70 | if (cap) {
|
71 | if (cap[0].length > 1) {
|
72 | return {
|
73 | type: 'space',
|
74 | raw: cap[0]
|
75 | };
|
76 | }
|
77 | return { raw: '\n' };
|
78 | }
|
79 | }
|
80 |
|
81 | code(src, tokens) {
|
82 | const cap = this.rules.block.code.exec(src);
|
83 | if (cap) {
|
84 | const lastToken = tokens[tokens.length - 1];
|
85 |
|
86 | if (lastToken && lastToken.type === 'paragraph') {
|
87 | return {
|
88 | raw: cap[0],
|
89 | text: cap[0].trimRight()
|
90 | };
|
91 | }
|
92 |
|
93 | const text = cap[0].replace(/^ {4}/gm, '');
|
94 | return {
|
95 | type: 'code',
|
96 | raw: cap[0],
|
97 | codeBlockStyle: 'indented',
|
98 | text: !this.options.pedantic
|
99 | ? rtrim(text, '\n')
|
100 | : text
|
101 | };
|
102 | }
|
103 | }
|
104 |
|
105 | fences(src) {
|
106 | const cap = this.rules.block.fences.exec(src);
|
107 | if (cap) {
|
108 | const raw = cap[0];
|
109 | const text = indentCodeCompensation(raw, cap[3] || '');
|
110 |
|
111 | return {
|
112 | type: 'code',
|
113 | raw,
|
114 | lang: cap[2] ? cap[2].trim() : cap[2],
|
115 | text
|
116 | };
|
117 | }
|
118 | }
|
119 |
|
120 | heading(src) {
|
121 | const cap = this.rules.block.heading.exec(src);
|
122 | if (cap) {
|
123 | return {
|
124 | type: 'heading',
|
125 | raw: cap[0],
|
126 | depth: cap[1].length,
|
127 | text: cap[2]
|
128 | };
|
129 | }
|
130 | }
|
131 |
|
132 | nptable(src) {
|
133 | const cap = this.rules.block.nptable.exec(src);
|
134 | if (cap) {
|
135 | const item = {
|
136 | type: 'table',
|
137 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
138 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
139 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
|
140 | raw: cap[0]
|
141 | };
|
142 |
|
143 | if (item.header.length === item.align.length) {
|
144 | let l = item.align.length;
|
145 | let i;
|
146 | for (i = 0; i < l; i++) {
|
147 | if (/^ *-+: *$/.test(item.align[i])) {
|
148 | item.align[i] = 'right';
|
149 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
150 | item.align[i] = 'center';
|
151 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
152 | item.align[i] = 'left';
|
153 | } else {
|
154 | item.align[i] = null;
|
155 | }
|
156 | }
|
157 |
|
158 | l = item.cells.length;
|
159 | for (i = 0; i < l; i++) {
|
160 | item.cells[i] = splitCells(item.cells[i], item.header.length);
|
161 | }
|
162 |
|
163 | return item;
|
164 | }
|
165 | }
|
166 | }
|
167 |
|
168 | hr(src) {
|
169 | const cap = this.rules.block.hr.exec(src);
|
170 | if (cap) {
|
171 | return {
|
172 | type: 'hr',
|
173 | raw: cap[0]
|
174 | };
|
175 | }
|
176 | }
|
177 |
|
178 | blockquote(src) {
|
179 | const cap = this.rules.block.blockquote.exec(src);
|
180 | if (cap) {
|
181 | const text = cap[0].replace(/^ *> ?/gm, '');
|
182 |
|
183 | return {
|
184 | type: 'blockquote',
|
185 | raw: cap[0],
|
186 | text
|
187 | };
|
188 | }
|
189 | }
|
190 |
|
191 | list(src) {
|
192 | const cap = this.rules.block.list.exec(src);
|
193 | if (cap) {
|
194 | let raw = cap[0];
|
195 | const bull = cap[2];
|
196 | const isordered = bull.length > 1;
|
197 |
|
198 | const list = {
|
199 | type: 'list',
|
200 | raw,
|
201 | ordered: isordered,
|
202 | start: isordered ? +bull : '',
|
203 | loose: false,
|
204 | items: []
|
205 | };
|
206 |
|
207 |
|
208 | const itemMatch = cap[0].match(this.rules.block.item);
|
209 |
|
210 | let next = false,
|
211 | item,
|
212 | space,
|
213 | b,
|
214 | addBack,
|
215 | loose,
|
216 | istask,
|
217 | ischecked;
|
218 |
|
219 | const l = itemMatch.length;
|
220 | for (let i = 0; i < l; i++) {
|
221 | item = itemMatch[i];
|
222 | raw = item;
|
223 |
|
224 |
|
225 |
|
226 | space = item.length;
|
227 | item = item.replace(/^ *([*+-]|\d+\.) */, '');
|
228 |
|
229 |
|
230 |
|
231 | if (~item.indexOf('\n ')) {
|
232 | space -= item.length;
|
233 | item = !this.options.pedantic
|
234 | ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
|
235 | : item.replace(/^ {1,4}/gm, '');
|
236 | }
|
237 |
|
238 |
|
239 |
|
240 | if (i !== l - 1) {
|
241 | b = this.rules.block.bullet.exec(itemMatch[i + 1])[0];
|
242 | if (bull.length > 1 ? b.length === 1
|
243 | : (b.length > 1 || (this.options.smartLists && b !== bull))) {
|
244 | addBack = itemMatch.slice(i + 1).join('\n');
|
245 | list.raw = list.raw.substring(0, list.raw.length - addBack.length);
|
246 | i = l - 1;
|
247 | }
|
248 | }
|
249 |
|
250 |
|
251 |
|
252 |
|
253 | loose = next || /\n\n(?!\s*$)/.test(item);
|
254 | if (i !== l - 1) {
|
255 | next = item.charAt(item.length - 1) === '\n';
|
256 | if (!loose) loose = next;
|
257 | }
|
258 |
|
259 | if (loose) {
|
260 | list.loose = true;
|
261 | }
|
262 |
|
263 |
|
264 | istask = /^\[[ xX]\] /.test(item);
|
265 | ischecked = undefined;
|
266 | if (istask) {
|
267 | ischecked = item[1] !== ' ';
|
268 | item = item.replace(/^\[[ xX]\] +/, '');
|
269 | }
|
270 |
|
271 | list.items.push({
|
272 | type: 'list_item',
|
273 | raw,
|
274 | task: istask,
|
275 | checked: ischecked,
|
276 | loose: loose,
|
277 | text: item
|
278 | });
|
279 | }
|
280 |
|
281 | return list;
|
282 | }
|
283 | }
|
284 |
|
285 | html(src) {
|
286 | const cap = this.rules.block.html.exec(src);
|
287 | if (cap) {
|
288 | return {
|
289 | type: this.options.sanitize
|
290 | ? 'paragraph'
|
291 | : 'html',
|
292 | raw: cap[0],
|
293 | pre: !this.options.sanitizer
|
294 | && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
|
295 | text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
|
296 | };
|
297 | }
|
298 | }
|
299 |
|
300 | def(src) {
|
301 | const cap = this.rules.block.def.exec(src);
|
302 | if (cap) {
|
303 | if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
|
304 | const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
|
305 | return {
|
306 | tag,
|
307 | raw: cap[0],
|
308 | href: cap[2],
|
309 | title: cap[3]
|
310 | };
|
311 | }
|
312 | }
|
313 |
|
314 | table(src) {
|
315 | const cap = this.rules.block.table.exec(src);
|
316 | if (cap) {
|
317 | const item = {
|
318 | type: 'table',
|
319 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
320 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
321 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
|
322 | };
|
323 |
|
324 | if (item.header.length === item.align.length) {
|
325 | item.raw = cap[0];
|
326 |
|
327 | let l = item.align.length;
|
328 | let i;
|
329 | for (i = 0; i < l; i++) {
|
330 | if (/^ *-+: *$/.test(item.align[i])) {
|
331 | item.align[i] = 'right';
|
332 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
333 | item.align[i] = 'center';
|
334 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
335 | item.align[i] = 'left';
|
336 | } else {
|
337 | item.align[i] = null;
|
338 | }
|
339 | }
|
340 |
|
341 | l = item.cells.length;
|
342 | for (i = 0; i < l; i++) {
|
343 | item.cells[i] = splitCells(
|
344 | item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
|
345 | item.header.length);
|
346 | }
|
347 |
|
348 | return item;
|
349 | }
|
350 | }
|
351 | }
|
352 |
|
353 | lheading(src) {
|
354 | const cap = this.rules.block.lheading.exec(src);
|
355 | if (cap) {
|
356 | return {
|
357 | type: 'heading',
|
358 | raw: cap[0],
|
359 | depth: cap[2].charAt(0) === '=' ? 1 : 2,
|
360 | text: cap[1]
|
361 | };
|
362 | }
|
363 | }
|
364 |
|
365 | paragraph(src) {
|
366 | const cap = this.rules.block.paragraph.exec(src);
|
367 | if (cap) {
|
368 | return {
|
369 | type: 'paragraph',
|
370 | raw: cap[0],
|
371 | text: cap[1].charAt(cap[1].length - 1) === '\n'
|
372 | ? cap[1].slice(0, -1)
|
373 | : cap[1]
|
374 | };
|
375 | }
|
376 | }
|
377 |
|
378 | text(src, tokens) {
|
379 | const cap = this.rules.block.text.exec(src);
|
380 | if (cap) {
|
381 | const lastToken = tokens[tokens.length - 1];
|
382 | if (lastToken && lastToken.type === 'text') {
|
383 | return {
|
384 | raw: cap[0],
|
385 | text: cap[0]
|
386 | };
|
387 | }
|
388 |
|
389 | return {
|
390 | type: 'text',
|
391 | raw: cap[0],
|
392 | text: cap[0]
|
393 | };
|
394 | }
|
395 | }
|
396 |
|
397 | escape(src) {
|
398 | const cap = this.rules.inline.escape.exec(src);
|
399 | if (cap) {
|
400 | return {
|
401 | type: 'escape',
|
402 | raw: cap[0],
|
403 | text: escape(cap[1])
|
404 | };
|
405 | }
|
406 | }
|
407 |
|
408 | tag(src, inLink, inRawBlock) {
|
409 | const cap = this.rules.inline.tag.exec(src);
|
410 | if (cap) {
|
411 | if (!inLink && /^<a /i.test(cap[0])) {
|
412 | inLink = true;
|
413 | } else if (inLink && /^<\/a>/i.test(cap[0])) {
|
414 | inLink = false;
|
415 | }
|
416 | if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
417 | inRawBlock = true;
|
418 | } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
419 | inRawBlock = false;
|
420 | }
|
421 |
|
422 | return {
|
423 | type: this.options.sanitize
|
424 | ? 'text'
|
425 | : 'html',
|
426 | raw: cap[0],
|
427 | inLink,
|
428 | inRawBlock,
|
429 | text: this.options.sanitize
|
430 | ? (this.options.sanitizer
|
431 | ? this.options.sanitizer(cap[0])
|
432 | : escape(cap[0]))
|
433 | : cap[0]
|
434 | };
|
435 | }
|
436 | }
|
437 |
|
438 | link(src) {
|
439 | const cap = this.rules.inline.link.exec(src);
|
440 | if (cap) {
|
441 | const lastParenIndex = findClosingBracket(cap[2], '()');
|
442 | if (lastParenIndex > -1) {
|
443 | const start = cap[0].indexOf('!') === 0 ? 5 : 4;
|
444 | const linkLen = start + cap[1].length + lastParenIndex;
|
445 | cap[2] = cap[2].substring(0, lastParenIndex);
|
446 | cap[0] = cap[0].substring(0, linkLen).trim();
|
447 | cap[3] = '';
|
448 | }
|
449 | let href = cap[2];
|
450 | let title = '';
|
451 | if (this.options.pedantic) {
|
452 | const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
|
453 |
|
454 | if (link) {
|
455 | href = link[1];
|
456 | title = link[3];
|
457 | } else {
|
458 | title = '';
|
459 | }
|
460 | } else {
|
461 | title = cap[3] ? cap[3].slice(1, -1) : '';
|
462 | }
|
463 | href = href.trim().replace(/^<([\s\S]*)>$/, '$1');
|
464 | const token = outputLink(cap, {
|
465 | href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
|
466 | title: title ? title.replace(this.rules.inline._escapes, '$1') : title
|
467 | }, cap[0]);
|
468 | return token;
|
469 | }
|
470 | }
|
471 |
|
472 | reflink(src, links) {
|
473 | let cap;
|
474 | if ((cap = this.rules.inline.reflink.exec(src))
|
475 | || (cap = this.rules.inline.nolink.exec(src))) {
|
476 | let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
|
477 | link = links[link.toLowerCase()];
|
478 | if (!link || !link.href) {
|
479 | const text = cap[0].charAt(0);
|
480 | return {
|
481 | type: 'text',
|
482 | raw: text,
|
483 | text
|
484 | };
|
485 | }
|
486 | const token = outputLink(cap, link, cap[0]);
|
487 | return token;
|
488 | }
|
489 | }
|
490 |
|
491 | strong(src) {
|
492 | const cap = this.rules.inline.strong.exec(src);
|
493 | if (cap) {
|
494 | return {
|
495 | type: 'strong',
|
496 | raw: cap[0],
|
497 | text: cap[4] || cap[3] || cap[2] || cap[1]
|
498 | };
|
499 | }
|
500 | }
|
501 |
|
502 | em(src) {
|
503 | const cap = this.rules.inline.em.exec(src);
|
504 | if (cap) {
|
505 | return {
|
506 | type: 'em',
|
507 | raw: cap[0],
|
508 | text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]
|
509 | };
|
510 | }
|
511 | }
|
512 |
|
513 | codespan(src) {
|
514 | const cap = this.rules.inline.code.exec(src);
|
515 | if (cap) {
|
516 | let text = cap[2].replace(/\n/g, ' ');
|
517 | const hasNonSpaceChars = /[^ ]/.test(text);
|
518 | const hasSpaceCharsOnBothEnds = text.startsWith(' ') && text.endsWith(' ');
|
519 | if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
|
520 | text = text.substring(1, text.length - 1);
|
521 | }
|
522 | text = escape(text, true);
|
523 | return {
|
524 | type: 'codespan',
|
525 | raw: cap[0],
|
526 | text
|
527 | };
|
528 | }
|
529 | }
|
530 |
|
531 | br(src) {
|
532 | const cap = this.rules.inline.br.exec(src);
|
533 | if (cap) {
|
534 | return {
|
535 | type: 'br',
|
536 | raw: cap[0]
|
537 | };
|
538 | }
|
539 | }
|
540 |
|
541 | del(src) {
|
542 | const cap = this.rules.inline.del.exec(src);
|
543 | if (cap) {
|
544 | return {
|
545 | type: 'del',
|
546 | raw: cap[0],
|
547 | text: cap[1]
|
548 | };
|
549 | }
|
550 | }
|
551 |
|
552 | autolink(src, mangle) {
|
553 | const cap = this.rules.inline.autolink.exec(src);
|
554 | if (cap) {
|
555 | let text, href;
|
556 | if (cap[2] === '@') {
|
557 | text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
|
558 | href = 'mailto:' + text;
|
559 | } else {
|
560 | text = escape(cap[1]);
|
561 | href = text;
|
562 | }
|
563 |
|
564 | return {
|
565 | type: 'link',
|
566 | raw: cap[0],
|
567 | text,
|
568 | href,
|
569 | tokens: [
|
570 | {
|
571 | type: 'text',
|
572 | raw: text,
|
573 | text
|
574 | }
|
575 | ]
|
576 | };
|
577 | }
|
578 | }
|
579 |
|
580 | url(src, mangle) {
|
581 | let cap;
|
582 | if (cap = this.rules.inline.url.exec(src)) {
|
583 | let text, href;
|
584 | if (cap[2] === '@') {
|
585 | text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
|
586 | href = 'mailto:' + text;
|
587 | } else {
|
588 |
|
589 | let prevCapZero;
|
590 | do {
|
591 | prevCapZero = cap[0];
|
592 | cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
|
593 | } while (prevCapZero !== cap[0]);
|
594 | text = escape(cap[0]);
|
595 | if (cap[1] === 'www.') {
|
596 | href = 'http://' + text;
|
597 | } else {
|
598 | href = text;
|
599 | }
|
600 | }
|
601 | return {
|
602 | type: 'link',
|
603 | raw: cap[0],
|
604 | text,
|
605 | href,
|
606 | tokens: [
|
607 | {
|
608 | type: 'text',
|
609 | raw: text,
|
610 | text
|
611 | }
|
612 | ]
|
613 | };
|
614 | }
|
615 | }
|
616 |
|
617 | inlineText(src, inRawBlock, smartypants) {
|
618 | const cap = this.rules.inline.text.exec(src);
|
619 | if (cap) {
|
620 | let text;
|
621 | if (inRawBlock) {
|
622 | text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
|
623 | } else {
|
624 | text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
|
625 | }
|
626 | return {
|
627 | type: 'text',
|
628 | raw: cap[0],
|
629 | text
|
630 | };
|
631 | }
|
632 | }
|
633 | };
|