1 | const { defaults } = require('./defaults.js');
|
2 | const {
|
3 | rtrim,
|
4 | splitCells,
|
5 | escape,
|
6 | findClosingBracket
|
7 | } = require('./helpers.js');
|
8 |
|
9 | function outputLink(cap, link, raw) {
|
10 | const href = link.href;
|
11 | const title = link.title ? escape(link.title) : null;
|
12 |
|
13 | if (cap[0].charAt(0) !== '!') {
|
14 | return {
|
15 | type: 'link',
|
16 | raw,
|
17 | href,
|
18 | title,
|
19 | text: cap[1]
|
20 | };
|
21 | } else {
|
22 | return {
|
23 | type: 'image',
|
24 | raw,
|
25 | text: escape(cap[1]),
|
26 | href,
|
27 | title
|
28 | };
|
29 | }
|
30 | }
|
31 |
|
32 |
|
33 |
|
34 |
|
35 | module.exports = class Tokenizer {
|
36 | constructor(options) {
|
37 | this.options = options || defaults;
|
38 | }
|
39 |
|
40 | space(src) {
|
41 | const cap = this.rules.block.newline.exec(src);
|
42 | if (cap) {
|
43 | if (cap[0].length > 1) {
|
44 | return {
|
45 | type: 'space',
|
46 | raw: cap[0]
|
47 | };
|
48 | }
|
49 | return { raw: '\n' };
|
50 | }
|
51 | }
|
52 |
|
53 | code(src, tokens) {
|
54 | const cap = this.rules.block.code.exec(src);
|
55 | if (cap) {
|
56 | const lastToken = tokens[tokens.length - 1];
|
57 |
|
58 | if (lastToken && lastToken.type === 'paragraph') {
|
59 | tokens.pop();
|
60 | lastToken.text += '\n' + cap[0].trimRight();
|
61 | lastToken.raw += '\n' + cap[0];
|
62 | return lastToken;
|
63 | } else {
|
64 | const text = cap[0].replace(/^ {4}/gm, '');
|
65 | return {
|
66 | type: 'code',
|
67 | raw: cap[0],
|
68 | codeBlockStyle: 'indented',
|
69 | text: !this.options.pedantic
|
70 | ? rtrim(text, '\n')
|
71 | : text
|
72 | };
|
73 | }
|
74 | }
|
75 | }
|
76 |
|
77 | fences(src) {
|
78 | const cap = this.rules.block.fences.exec(src);
|
79 | if (cap) {
|
80 | return {
|
81 | type: 'code',
|
82 | raw: cap[0],
|
83 | lang: cap[2] ? cap[2].trim() : cap[2],
|
84 | text: cap[3] || ''
|
85 | };
|
86 | }
|
87 | }
|
88 |
|
89 | heading(src) {
|
90 | const cap = this.rules.block.heading.exec(src);
|
91 | if (cap) {
|
92 | return {
|
93 | type: 'heading',
|
94 | raw: cap[0],
|
95 | depth: cap[1].length,
|
96 | text: cap[2]
|
97 | };
|
98 | }
|
99 | }
|
100 |
|
101 | nptable(src) {
|
102 | const cap = this.rules.block.nptable.exec(src);
|
103 | if (cap) {
|
104 | const item = {
|
105 | type: 'table',
|
106 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
107 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
108 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
|
109 | raw: cap[0]
|
110 | };
|
111 |
|
112 | if (item.header.length === item.align.length) {
|
113 | let l = item.align.length;
|
114 | let i;
|
115 | for (i = 0; i < l; i++) {
|
116 | if (/^ *-+: *$/.test(item.align[i])) {
|
117 | item.align[i] = 'right';
|
118 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
119 | item.align[i] = 'center';
|
120 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
121 | item.align[i] = 'left';
|
122 | } else {
|
123 | item.align[i] = null;
|
124 | }
|
125 | }
|
126 |
|
127 | l = item.cells.length;
|
128 | for (i = 0; i < l; i++) {
|
129 | item.cells[i] = splitCells(item.cells[i], item.header.length);
|
130 | }
|
131 |
|
132 | return item;
|
133 | }
|
134 | }
|
135 | }
|
136 |
|
137 | hr(src) {
|
138 | const cap = this.rules.block.hr.exec(src);
|
139 | if (cap) {
|
140 | return {
|
141 | type: 'hr',
|
142 | raw: cap[0]
|
143 | };
|
144 | }
|
145 | }
|
146 |
|
147 | blockquote(src) {
|
148 | const cap = this.rules.block.blockquote.exec(src);
|
149 | if (cap) {
|
150 | const text = cap[0].replace(/^ *> ?/gm, '');
|
151 |
|
152 | return {
|
153 | type: 'blockquote',
|
154 | raw: cap[0],
|
155 | text
|
156 | };
|
157 | }
|
158 | }
|
159 |
|
160 | list(src) {
|
161 | const cap = this.rules.block.list.exec(src);
|
162 | if (cap) {
|
163 | let raw = cap[0];
|
164 | const bull = cap[2];
|
165 | const isordered = bull.length > 1;
|
166 |
|
167 | const list = {
|
168 | type: 'list',
|
169 | raw,
|
170 | ordered: isordered,
|
171 | start: isordered ? +bull : '',
|
172 | loose: false,
|
173 | items: []
|
174 | };
|
175 |
|
176 |
|
177 | const itemMatch = cap[0].match(this.rules.block.item);
|
178 |
|
179 | let next = false,
|
180 | item,
|
181 | space,
|
182 | b,
|
183 | addBack,
|
184 | loose,
|
185 | istask,
|
186 | ischecked;
|
187 |
|
188 | const l = itemMatch.length;
|
189 | for (let i = 0; i < l; i++) {
|
190 | item = itemMatch[i];
|
191 | raw = item;
|
192 |
|
193 |
|
194 |
|
195 | space = item.length;
|
196 | item = item.replace(/^ *([*+-]|\d+\.) */, '');
|
197 |
|
198 |
|
199 |
|
200 | if (~item.indexOf('\n ')) {
|
201 | space -= item.length;
|
202 | item = !this.options.pedantic
|
203 | ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
|
204 | : item.replace(/^ {1,4}/gm, '');
|
205 | }
|
206 |
|
207 |
|
208 |
|
209 | if (i !== l - 1) {
|
210 | b = this.rules.block.bullet.exec(itemMatch[i + 1])[0];
|
211 | if (bull.length > 1 ? b.length === 1
|
212 | : (b.length > 1 || (this.options.smartLists && b !== bull))) {
|
213 | addBack = itemMatch.slice(i + 1).join('\n');
|
214 | list.raw = list.raw.substring(0, list.raw.length - addBack.length);
|
215 | i = l - 1;
|
216 | }
|
217 | }
|
218 |
|
219 |
|
220 |
|
221 |
|
222 | loose = next || /\n\n(?!\s*$)/.test(item);
|
223 | if (i !== l - 1) {
|
224 | next = item.charAt(item.length - 1) === '\n';
|
225 | if (!loose) loose = next;
|
226 | }
|
227 |
|
228 | if (loose) {
|
229 | list.loose = true;
|
230 | }
|
231 |
|
232 |
|
233 | istask = /^\[[ xX]\] /.test(item);
|
234 | ischecked = undefined;
|
235 | if (istask) {
|
236 | ischecked = item[1] !== ' ';
|
237 | item = item.replace(/^\[[ xX]\] +/, '');
|
238 | }
|
239 |
|
240 | list.items.push({
|
241 | raw,
|
242 | task: istask,
|
243 | checked: ischecked,
|
244 | loose: loose,
|
245 | text: item
|
246 | });
|
247 | }
|
248 |
|
249 | return list;
|
250 | }
|
251 | }
|
252 |
|
253 | html(src) {
|
254 | const cap = this.rules.block.html.exec(src);
|
255 | if (cap) {
|
256 | return {
|
257 | type: this.options.sanitize
|
258 | ? 'paragraph'
|
259 | : 'html',
|
260 | raw: cap[0],
|
261 | pre: !this.options.sanitizer
|
262 | && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
|
263 | text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
|
264 | };
|
265 | }
|
266 | }
|
267 |
|
268 | def(src) {
|
269 | const cap = this.rules.block.def.exec(src);
|
270 | if (cap) {
|
271 | if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
|
272 | const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
|
273 | return {
|
274 | tag,
|
275 | raw: cap[0],
|
276 | href: cap[2],
|
277 | title: cap[3]
|
278 | };
|
279 | }
|
280 | }
|
281 |
|
282 | table(src) {
|
283 | const cap = this.rules.block.table.exec(src);
|
284 | if (cap) {
|
285 | const item = {
|
286 | type: 'table',
|
287 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
288 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
289 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
|
290 | };
|
291 |
|
292 | if (item.header.length === item.align.length) {
|
293 | item.raw = cap[0];
|
294 |
|
295 | let l = item.align.length;
|
296 | let i;
|
297 | for (i = 0; i < l; i++) {
|
298 | if (/^ *-+: *$/.test(item.align[i])) {
|
299 | item.align[i] = 'right';
|
300 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
301 | item.align[i] = 'center';
|
302 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
303 | item.align[i] = 'left';
|
304 | } else {
|
305 | item.align[i] = null;
|
306 | }
|
307 | }
|
308 |
|
309 | l = item.cells.length;
|
310 | for (i = 0; i < l; i++) {
|
311 | item.cells[i] = splitCells(
|
312 | item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
|
313 | item.header.length);
|
314 | }
|
315 |
|
316 | return item;
|
317 | }
|
318 | }
|
319 | }
|
320 |
|
321 | lheading(src) {
|
322 | const cap = this.rules.block.lheading.exec(src);
|
323 | if (cap) {
|
324 | return {
|
325 | type: 'heading',
|
326 | raw: cap[0],
|
327 | depth: cap[2].charAt(0) === '=' ? 1 : 2,
|
328 | text: cap[1]
|
329 | };
|
330 | }
|
331 | }
|
332 |
|
333 | paragraph(src) {
|
334 | const cap = this.rules.block.paragraph.exec(src);
|
335 | if (cap) {
|
336 | return {
|
337 | type: 'paragraph',
|
338 | raw: cap[0],
|
339 | text: cap[1].charAt(cap[1].length - 1) === '\n'
|
340 | ? cap[1].slice(0, -1)
|
341 | : cap[1]
|
342 | };
|
343 | }
|
344 | }
|
345 |
|
346 | text(src) {
|
347 | const cap = this.rules.block.text.exec(src);
|
348 | if (cap) {
|
349 | return {
|
350 | type: 'text',
|
351 | raw: cap[0],
|
352 | text: cap[0]
|
353 | };
|
354 | }
|
355 | }
|
356 |
|
357 | escape(src) {
|
358 | const cap = this.rules.inline.escape.exec(src);
|
359 | if (cap) {
|
360 | return {
|
361 | type: 'escape',
|
362 | raw: cap[0],
|
363 | text: escape(cap[1])
|
364 | };
|
365 | }
|
366 | }
|
367 |
|
368 | tag(src, inLink, inRawBlock) {
|
369 | const cap = this.rules.inline.tag.exec(src);
|
370 | if (cap) {
|
371 | if (!inLink && /^<a /i.test(cap[0])) {
|
372 | inLink = true;
|
373 | } else if (inLink && /^<\/a>/i.test(cap[0])) {
|
374 | inLink = false;
|
375 | }
|
376 | if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
377 | inRawBlock = true;
|
378 | } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
379 | inRawBlock = false;
|
380 | }
|
381 |
|
382 | return {
|
383 | type: this.options.sanitize
|
384 | ? 'text'
|
385 | : 'html',
|
386 | raw: cap[0],
|
387 | inLink,
|
388 | inRawBlock,
|
389 | text: this.options.sanitize
|
390 | ? (this.options.sanitizer
|
391 | ? this.options.sanitizer(cap[0])
|
392 | : escape(cap[0]))
|
393 | : cap[0]
|
394 | };
|
395 | }
|
396 | }
|
397 |
|
398 | link(src) {
|
399 | const cap = this.rules.inline.link.exec(src);
|
400 | if (cap) {
|
401 | const lastParenIndex = findClosingBracket(cap[2], '()');
|
402 | if (lastParenIndex > -1) {
|
403 | const start = cap[0].indexOf('!') === 0 ? 5 : 4;
|
404 | const linkLen = start + cap[1].length + lastParenIndex;
|
405 | cap[2] = cap[2].substring(0, lastParenIndex);
|
406 | cap[0] = cap[0].substring(0, linkLen).trim();
|
407 | cap[3] = '';
|
408 | }
|
409 | let href = cap[2];
|
410 | let title = '';
|
411 | if (this.options.pedantic) {
|
412 | const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
|
413 |
|
414 | if (link) {
|
415 | href = link[1];
|
416 | title = link[3];
|
417 | } else {
|
418 | title = '';
|
419 | }
|
420 | } else {
|
421 | title = cap[3] ? cap[3].slice(1, -1) : '';
|
422 | }
|
423 | href = href.trim().replace(/^<([\s\S]*)>$/, '$1');
|
424 | const token = outputLink(cap, {
|
425 | href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
|
426 | title: title ? title.replace(this.rules.inline._escapes, '$1') : title
|
427 | }, cap[0]);
|
428 | return token;
|
429 | }
|
430 | }
|
431 |
|
432 | reflink(src, links) {
|
433 | let cap;
|
434 | if ((cap = this.rules.inline.reflink.exec(src))
|
435 | || (cap = this.rules.inline.nolink.exec(src))) {
|
436 | let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
|
437 | link = links[link.toLowerCase()];
|
438 | if (!link || !link.href) {
|
439 | const text = cap[0].charAt(0);
|
440 | return {
|
441 | type: 'text',
|
442 | raw: text,
|
443 | text
|
444 | };
|
445 | }
|
446 | const token = outputLink(cap, link, cap[0]);
|
447 | return token;
|
448 | }
|
449 | }
|
450 |
|
451 | strong(src) {
|
452 | const cap = this.rules.inline.strong.exec(src);
|
453 | if (cap) {
|
454 | return {
|
455 | type: 'strong',
|
456 | raw: cap[0],
|
457 | text: cap[4] || cap[3] || cap[2] || cap[1]
|
458 | };
|
459 | }
|
460 | }
|
461 |
|
462 | em(src) {
|
463 | const cap = this.rules.inline.em.exec(src);
|
464 | if (cap) {
|
465 | return {
|
466 | type: 'em',
|
467 | raw: cap[0],
|
468 | text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]
|
469 | };
|
470 | }
|
471 | }
|
472 |
|
473 | codespan(src) {
|
474 | const cap = this.rules.inline.code.exec(src);
|
475 | if (cap) {
|
476 | return {
|
477 | type: 'codespan',
|
478 | raw: cap[0],
|
479 | text: escape(cap[2].trim(), true)
|
480 | };
|
481 | }
|
482 | }
|
483 |
|
484 | br(src) {
|
485 | const cap = this.rules.inline.br.exec(src);
|
486 | if (cap) {
|
487 | return {
|
488 | type: 'br',
|
489 | raw: cap[0]
|
490 | };
|
491 | }
|
492 | }
|
493 |
|
494 | del(src) {
|
495 | const cap = this.rules.inline.del.exec(src);
|
496 | if (cap) {
|
497 | return {
|
498 | type: 'del',
|
499 | raw: cap[0],
|
500 | text: cap[1]
|
501 | };
|
502 | }
|
503 | }
|
504 |
|
505 | autolink(src, mangle) {
|
506 | const cap = this.rules.inline.autolink.exec(src);
|
507 | if (cap) {
|
508 | let text, href;
|
509 | if (cap[2] === '@') {
|
510 | text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
|
511 | href = 'mailto:' + text;
|
512 | } else {
|
513 | text = escape(cap[1]);
|
514 | href = text;
|
515 | }
|
516 |
|
517 | return {
|
518 | type: 'link',
|
519 | raw: cap[0],
|
520 | text,
|
521 | href,
|
522 | tokens: [
|
523 | {
|
524 | type: 'text',
|
525 | raw: text,
|
526 | text
|
527 | }
|
528 | ]
|
529 | };
|
530 | }
|
531 | }
|
532 |
|
533 | url(src, mangle) {
|
534 | let cap;
|
535 | if (cap = this.rules.inline.url.exec(src)) {
|
536 | let text, href;
|
537 | if (cap[2] === '@') {
|
538 | text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
|
539 | href = 'mailto:' + text;
|
540 | } else {
|
541 |
|
542 | let prevCapZero;
|
543 | do {
|
544 | prevCapZero = cap[0];
|
545 | cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
|
546 | } while (prevCapZero !== cap[0]);
|
547 | text = escape(cap[0]);
|
548 | if (cap[1] === 'www.') {
|
549 | href = 'http://' + text;
|
550 | } else {
|
551 | href = text;
|
552 | }
|
553 | }
|
554 | return {
|
555 | type: 'link',
|
556 | raw: cap[0],
|
557 | text,
|
558 | href,
|
559 | tokens: [
|
560 | {
|
561 | type: 'text',
|
562 | raw: text,
|
563 | text
|
564 | }
|
565 | ]
|
566 | };
|
567 | }
|
568 | }
|
569 |
|
570 | inlineText(src, inRawBlock, smartypants) {
|
571 | const cap = this.rules.inline.text.exec(src);
|
572 | if (cap) {
|
573 | let text;
|
574 | if (inRawBlock) {
|
575 | text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
|
576 | } else {
|
577 | text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
|
578 | }
|
579 | return {
|
580 | type: 'text',
|
581 | raw: cap[0],
|
582 | text
|
583 | };
|
584 | }
|
585 | }
|
586 | };
|