UNPKG

15 kBJavaScriptView Raw
1const { defaults } = require('./defaults.js');
2const {
3 rtrim,
4 splitCells,
5 escape,
6 findClosingBracket
7} = require('./helpers.js');
8
9function outputLink(cap, link, raw) {
10 const href = link.href;
11 const title = link.title ? escape(link.title) : null;
12
13 if (cap[0].charAt(0) !== '!') {
14 return {
15 type: 'link',
16 raw,
17 href,
18 title,
19 text: cap[1]
20 };
21 } else {
22 return {
23 type: 'image',
24 raw,
25 text: escape(cap[1]),
26 href,
27 title
28 };
29 }
30}
31
32function indentCodeCompensation(raw, text) {
33 const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
34
35 if (matchIndentToCode === null) {
36 return text;
37 }
38
39 const indentToCode = matchIndentToCode[1];
40
41 return text
42 .split('\n')
43 .map(node => {
44 const matchIndentInNode = node.match(/^\s+/);
45 if (matchIndentInNode === null) {
46 return node;
47 }
48
49 const [indentInNode] = matchIndentInNode;
50
51 if (indentInNode.length >= indentToCode.length) {
52 return node.slice(indentToCode.length);
53 }
54
55 return node;
56 })
57 .join('\n');
58}
59
60/**
61 * Tokenizer
62 */
63module.exports = class Tokenizer {
64 constructor(options) {
65 this.options = options || defaults;
66 }
67
68 space(src) {
69 const cap = this.rules.block.newline.exec(src);
70 if (cap) {
71 if (cap[0].length > 1) {
72 return {
73 type: 'space',
74 raw: cap[0]
75 };
76 }
77 return { raw: '\n' };
78 }
79 }
80
81 code(src, tokens) {
82 const cap = this.rules.block.code.exec(src);
83 if (cap) {
84 const lastToken = tokens[tokens.length - 1];
85 // An indented code block cannot interrupt a paragraph.
86 if (lastToken && lastToken.type === 'paragraph') {
87 return {
88 raw: cap[0],
89 text: cap[0].trimRight()
90 };
91 }
92
93 const text = cap[0].replace(/^ {4}/gm, '');
94 return {
95 type: 'code',
96 raw: cap[0],
97 codeBlockStyle: 'indented',
98 text: !this.options.pedantic
99 ? rtrim(text, '\n')
100 : text
101 };
102 }
103 }
104
105 fences(src) {
106 const cap = this.rules.block.fences.exec(src);
107 if (cap) {
108 const raw = cap[0];
109 const text = indentCodeCompensation(raw, cap[3] || '');
110
111 return {
112 type: 'code',
113 raw,
114 lang: cap[2] ? cap[2].trim() : cap[2],
115 text
116 };
117 }
118 }
119
120 heading(src) {
121 const cap = this.rules.block.heading.exec(src);
122 if (cap) {
123 return {
124 type: 'heading',
125 raw: cap[0],
126 depth: cap[1].length,
127 text: cap[2]
128 };
129 }
130 }
131
132 nptable(src) {
133 const cap = this.rules.block.nptable.exec(src);
134 if (cap) {
135 const item = {
136 type: 'table',
137 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
138 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
139 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
140 raw: cap[0]
141 };
142
143 if (item.header.length === item.align.length) {
144 let l = item.align.length;
145 let i;
146 for (i = 0; i < l; i++) {
147 if (/^ *-+: *$/.test(item.align[i])) {
148 item.align[i] = 'right';
149 } else if (/^ *:-+: *$/.test(item.align[i])) {
150 item.align[i] = 'center';
151 } else if (/^ *:-+ *$/.test(item.align[i])) {
152 item.align[i] = 'left';
153 } else {
154 item.align[i] = null;
155 }
156 }
157
158 l = item.cells.length;
159 for (i = 0; i < l; i++) {
160 item.cells[i] = splitCells(item.cells[i], item.header.length);
161 }
162
163 return item;
164 }
165 }
166 }
167
168 hr(src) {
169 const cap = this.rules.block.hr.exec(src);
170 if (cap) {
171 return {
172 type: 'hr',
173 raw: cap[0]
174 };
175 }
176 }
177
178 blockquote(src) {
179 const cap = this.rules.block.blockquote.exec(src);
180 if (cap) {
181 const text = cap[0].replace(/^ *> ?/gm, '');
182
183 return {
184 type: 'blockquote',
185 raw: cap[0],
186 text
187 };
188 }
189 }
190
191 list(src) {
192 const cap = this.rules.block.list.exec(src);
193 if (cap) {
194 let raw = cap[0];
195 const bull = cap[2];
196 const isordered = bull.length > 1;
197
198 const list = {
199 type: 'list',
200 raw,
201 ordered: isordered,
202 start: isordered ? +bull : '',
203 loose: false,
204 items: []
205 };
206
207 // Get each top-level item.
208 const itemMatch = cap[0].match(this.rules.block.item);
209
210 let next = false,
211 item,
212 space,
213 b,
214 addBack,
215 loose,
216 istask,
217 ischecked;
218
219 const l = itemMatch.length;
220 for (let i = 0; i < l; i++) {
221 item = itemMatch[i];
222 raw = item;
223
224 // Remove the list item's bullet
225 // so it is seen as the next token.
226 space = item.length;
227 item = item.replace(/^ *([*+-]|\d+\.) */, '');
228
229 // Outdent whatever the
230 // list item contains. Hacky.
231 if (~item.indexOf('\n ')) {
232 space -= item.length;
233 item = !this.options.pedantic
234 ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
235 : item.replace(/^ {1,4}/gm, '');
236 }
237
238 // Determine whether the next list item belongs here.
239 // Backpedal if it does not belong in this list.
240 if (i !== l - 1) {
241 b = this.rules.block.bullet.exec(itemMatch[i + 1])[0];
242 if (bull.length > 1 ? b.length === 1
243 : (b.length > 1 || (this.options.smartLists && b !== bull))) {
244 addBack = itemMatch.slice(i + 1).join('\n');
245 list.raw = list.raw.substring(0, list.raw.length - addBack.length);
246 i = l - 1;
247 }
248 }
249
250 // Determine whether item is loose or not.
251 // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
252 // for discount behavior.
253 loose = next || /\n\n(?!\s*$)/.test(item);
254 if (i !== l - 1) {
255 next = item.charAt(item.length - 1) === '\n';
256 if (!loose) loose = next;
257 }
258
259 if (loose) {
260 list.loose = true;
261 }
262
263 // Check for task list items
264 istask = /^\[[ xX]\] /.test(item);
265 ischecked = undefined;
266 if (istask) {
267 ischecked = item[1] !== ' ';
268 item = item.replace(/^\[[ xX]\] +/, '');
269 }
270
271 list.items.push({
272 type: 'list_item',
273 raw,
274 task: istask,
275 checked: ischecked,
276 loose: loose,
277 text: item
278 });
279 }
280
281 return list;
282 }
283 }
284
285 html(src) {
286 const cap = this.rules.block.html.exec(src);
287 if (cap) {
288 return {
289 type: this.options.sanitize
290 ? 'paragraph'
291 : 'html',
292 raw: cap[0],
293 pre: !this.options.sanitizer
294 && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
295 text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
296 };
297 }
298 }
299
300 def(src) {
301 const cap = this.rules.block.def.exec(src);
302 if (cap) {
303 if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
304 const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
305 return {
306 tag,
307 raw: cap[0],
308 href: cap[2],
309 title: cap[3]
310 };
311 }
312 }
313
314 table(src) {
315 const cap = this.rules.block.table.exec(src);
316 if (cap) {
317 const item = {
318 type: 'table',
319 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
320 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
321 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
322 };
323
324 if (item.header.length === item.align.length) {
325 item.raw = cap[0];
326
327 let l = item.align.length;
328 let i;
329 for (i = 0; i < l; i++) {
330 if (/^ *-+: *$/.test(item.align[i])) {
331 item.align[i] = 'right';
332 } else if (/^ *:-+: *$/.test(item.align[i])) {
333 item.align[i] = 'center';
334 } else if (/^ *:-+ *$/.test(item.align[i])) {
335 item.align[i] = 'left';
336 } else {
337 item.align[i] = null;
338 }
339 }
340
341 l = item.cells.length;
342 for (i = 0; i < l; i++) {
343 item.cells[i] = splitCells(
344 item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
345 item.header.length);
346 }
347
348 return item;
349 }
350 }
351 }
352
353 lheading(src) {
354 const cap = this.rules.block.lheading.exec(src);
355 if (cap) {
356 return {
357 type: 'heading',
358 raw: cap[0],
359 depth: cap[2].charAt(0) === '=' ? 1 : 2,
360 text: cap[1]
361 };
362 }
363 }
364
365 paragraph(src) {
366 const cap = this.rules.block.paragraph.exec(src);
367 if (cap) {
368 return {
369 type: 'paragraph',
370 raw: cap[0],
371 text: cap[1].charAt(cap[1].length - 1) === '\n'
372 ? cap[1].slice(0, -1)
373 : cap[1]
374 };
375 }
376 }
377
378 text(src, tokens) {
379 const cap = this.rules.block.text.exec(src);
380 if (cap) {
381 const lastToken = tokens[tokens.length - 1];
382 if (lastToken && lastToken.type === 'text') {
383 return {
384 raw: cap[0],
385 text: cap[0]
386 };
387 }
388
389 return {
390 type: 'text',
391 raw: cap[0],
392 text: cap[0]
393 };
394 }
395 }
396
397 escape(src) {
398 const cap = this.rules.inline.escape.exec(src);
399 if (cap) {
400 return {
401 type: 'escape',
402 raw: cap[0],
403 text: escape(cap[1])
404 };
405 }
406 }
407
408 tag(src, inLink, inRawBlock) {
409 const cap = this.rules.inline.tag.exec(src);
410 if (cap) {
411 if (!inLink && /^<a /i.test(cap[0])) {
412 inLink = true;
413 } else if (inLink && /^<\/a>/i.test(cap[0])) {
414 inLink = false;
415 }
416 if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
417 inRawBlock = true;
418 } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
419 inRawBlock = false;
420 }
421
422 return {
423 type: this.options.sanitize
424 ? 'text'
425 : 'html',
426 raw: cap[0],
427 inLink,
428 inRawBlock,
429 text: this.options.sanitize
430 ? (this.options.sanitizer
431 ? this.options.sanitizer(cap[0])
432 : escape(cap[0]))
433 : cap[0]
434 };
435 }
436 }
437
438 link(src) {
439 const cap = this.rules.inline.link.exec(src);
440 if (cap) {
441 const lastParenIndex = findClosingBracket(cap[2], '()');
442 if (lastParenIndex > -1) {
443 const start = cap[0].indexOf('!') === 0 ? 5 : 4;
444 const linkLen = start + cap[1].length + lastParenIndex;
445 cap[2] = cap[2].substring(0, lastParenIndex);
446 cap[0] = cap[0].substring(0, linkLen).trim();
447 cap[3] = '';
448 }
449 let href = cap[2];
450 let title = '';
451 if (this.options.pedantic) {
452 const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
453
454 if (link) {
455 href = link[1];
456 title = link[3];
457 } else {
458 title = '';
459 }
460 } else {
461 title = cap[3] ? cap[3].slice(1, -1) : '';
462 }
463 href = href.trim().replace(/^<([\s\S]*)>$/, '$1');
464 const token = outputLink(cap, {
465 href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
466 title: title ? title.replace(this.rules.inline._escapes, '$1') : title
467 }, cap[0]);
468 return token;
469 }
470 }
471
472 reflink(src, links) {
473 let cap;
474 if ((cap = this.rules.inline.reflink.exec(src))
475 || (cap = this.rules.inline.nolink.exec(src))) {
476 let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
477 link = links[link.toLowerCase()];
478 if (!link || !link.href) {
479 const text = cap[0].charAt(0);
480 return {
481 type: 'text',
482 raw: text,
483 text
484 };
485 }
486 const token = outputLink(cap, link, cap[0]);
487 return token;
488 }
489 }
490
491 strong(src) {
492 const cap = this.rules.inline.strong.exec(src);
493 if (cap) {
494 return {
495 type: 'strong',
496 raw: cap[0],
497 text: cap[4] || cap[3] || cap[2] || cap[1]
498 };
499 }
500 }
501
502 em(src) {
503 const cap = this.rules.inline.em.exec(src);
504 if (cap) {
505 return {
506 type: 'em',
507 raw: cap[0],
508 text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]
509 };
510 }
511 }
512
513 codespan(src) {
514 const cap = this.rules.inline.code.exec(src);
515 if (cap) {
516 let text = cap[2].replace(/\n/g, ' ');
517 const hasNonSpaceChars = /[^ ]/.test(text);
518 const hasSpaceCharsOnBothEnds = text.startsWith(' ') && text.endsWith(' ');
519 if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
520 text = text.substring(1, text.length - 1);
521 }
522 text = escape(text, true);
523 return {
524 type: 'codespan',
525 raw: cap[0],
526 text
527 };
528 }
529 }
530
531 br(src) {
532 const cap = this.rules.inline.br.exec(src);
533 if (cap) {
534 return {
535 type: 'br',
536 raw: cap[0]
537 };
538 }
539 }
540
541 del(src) {
542 const cap = this.rules.inline.del.exec(src);
543 if (cap) {
544 return {
545 type: 'del',
546 raw: cap[0],
547 text: cap[1]
548 };
549 }
550 }
551
552 autolink(src, mangle) {
553 const cap = this.rules.inline.autolink.exec(src);
554 if (cap) {
555 let text, href;
556 if (cap[2] === '@') {
557 text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
558 href = 'mailto:' + text;
559 } else {
560 text = escape(cap[1]);
561 href = text;
562 }
563
564 return {
565 type: 'link',
566 raw: cap[0],
567 text,
568 href,
569 tokens: [
570 {
571 type: 'text',
572 raw: text,
573 text
574 }
575 ]
576 };
577 }
578 }
579
580 url(src, mangle) {
581 let cap;
582 if (cap = this.rules.inline.url.exec(src)) {
583 let text, href;
584 if (cap[2] === '@') {
585 text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
586 href = 'mailto:' + text;
587 } else {
588 // do extended autolink path validation
589 let prevCapZero;
590 do {
591 prevCapZero = cap[0];
592 cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
593 } while (prevCapZero !== cap[0]);
594 text = escape(cap[0]);
595 if (cap[1] === 'www.') {
596 href = 'http://' + text;
597 } else {
598 href = text;
599 }
600 }
601 return {
602 type: 'link',
603 raw: cap[0],
604 text,
605 href,
606 tokens: [
607 {
608 type: 'text',
609 raw: text,
610 text
611 }
612 ]
613 };
614 }
615 }
616
617 inlineText(src, inRawBlock, smartypants) {
618 const cap = this.rules.inline.text.exec(src);
619 if (cap) {
620 let text;
621 if (inRawBlock) {
622 text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
623 } else {
624 text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
625 }
626 return {
627 type: 'text',
628 raw: cap[0],
629 text
630 };
631 }
632 }
633};