UNPKG

13.9 kBJavaScriptView Raw
1const { defaults } = require('./defaults.js');
2const {
3 rtrim,
4 splitCells,
5 escape,
6 findClosingBracket
7} = require('./helpers.js');
8
9function outputLink(cap, link, raw) {
10 const href = link.href;
11 const title = link.title ? escape(link.title) : null;
12
13 if (cap[0].charAt(0) !== '!') {
14 return {
15 type: 'link',
16 raw,
17 href,
18 title,
19 text: cap[1]
20 };
21 } else {
22 return {
23 type: 'image',
24 raw,
25 text: escape(cap[1]),
26 href,
27 title
28 };
29 }
30}
31
32/**
33 * Tokenizer
34 */
35module.exports = class Tokenizer {
36 constructor(options) {
37 this.options = options || defaults;
38 }
39
40 space(src) {
41 const cap = this.rules.block.newline.exec(src);
42 if (cap) {
43 if (cap[0].length > 1) {
44 return {
45 type: 'space',
46 raw: cap[0]
47 };
48 }
49 return { raw: '\n' };
50 }
51 }
52
53 code(src, tokens) {
54 const cap = this.rules.block.code.exec(src);
55 if (cap) {
56 const lastToken = tokens[tokens.length - 1];
57 // An indented code block cannot interrupt a paragraph.
58 if (lastToken && lastToken.type === 'paragraph') {
59 tokens.pop();
60 lastToken.text += '\n' + cap[0].trimRight();
61 lastToken.raw += '\n' + cap[0];
62 return lastToken;
63 } else {
64 const text = cap[0].replace(/^ {4}/gm, '');
65 return {
66 type: 'code',
67 raw: cap[0],
68 codeBlockStyle: 'indented',
69 text: !this.options.pedantic
70 ? rtrim(text, '\n')
71 : text
72 };
73 }
74 }
75 }
76
77 fences(src) {
78 const cap = this.rules.block.fences.exec(src);
79 if (cap) {
80 return {
81 type: 'code',
82 raw: cap[0],
83 lang: cap[2] ? cap[2].trim() : cap[2],
84 text: cap[3] || ''
85 };
86 }
87 }
88
89 heading(src) {
90 const cap = this.rules.block.heading.exec(src);
91 if (cap) {
92 return {
93 type: 'heading',
94 raw: cap[0],
95 depth: cap[1].length,
96 text: cap[2]
97 };
98 }
99 }
100
101 nptable(src) {
102 const cap = this.rules.block.nptable.exec(src);
103 if (cap) {
104 const item = {
105 type: 'table',
106 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
107 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
108 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
109 raw: cap[0]
110 };
111
112 if (item.header.length === item.align.length) {
113 let l = item.align.length;
114 let i;
115 for (i = 0; i < l; i++) {
116 if (/^ *-+: *$/.test(item.align[i])) {
117 item.align[i] = 'right';
118 } else if (/^ *:-+: *$/.test(item.align[i])) {
119 item.align[i] = 'center';
120 } else if (/^ *:-+ *$/.test(item.align[i])) {
121 item.align[i] = 'left';
122 } else {
123 item.align[i] = null;
124 }
125 }
126
127 l = item.cells.length;
128 for (i = 0; i < l; i++) {
129 item.cells[i] = splitCells(item.cells[i], item.header.length);
130 }
131
132 return item;
133 }
134 }
135 }
136
137 hr(src) {
138 const cap = this.rules.block.hr.exec(src);
139 if (cap) {
140 return {
141 type: 'hr',
142 raw: cap[0]
143 };
144 }
145 }
146
147 blockquote(src) {
148 const cap = this.rules.block.blockquote.exec(src);
149 if (cap) {
150 const text = cap[0].replace(/^ *> ?/gm, '');
151
152 return {
153 type: 'blockquote',
154 raw: cap[0],
155 text
156 };
157 }
158 }
159
160 list(src) {
161 const cap = this.rules.block.list.exec(src);
162 if (cap) {
163 let raw = cap[0];
164 const bull = cap[2];
165 const isordered = bull.length > 1;
166
167 const list = {
168 type: 'list',
169 raw,
170 ordered: isordered,
171 start: isordered ? +bull : '',
172 loose: false,
173 items: []
174 };
175
176 // Get each top-level item.
177 const itemMatch = cap[0].match(this.rules.block.item);
178
179 let next = false,
180 item,
181 space,
182 b,
183 addBack,
184 loose,
185 istask,
186 ischecked;
187
188 const l = itemMatch.length;
189 for (let i = 0; i < l; i++) {
190 item = itemMatch[i];
191 raw = item;
192
193 // Remove the list item's bullet
194 // so it is seen as the next token.
195 space = item.length;
196 item = item.replace(/^ *([*+-]|\d+\.) */, '');
197
198 // Outdent whatever the
199 // list item contains. Hacky.
200 if (~item.indexOf('\n ')) {
201 space -= item.length;
202 item = !this.options.pedantic
203 ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
204 : item.replace(/^ {1,4}/gm, '');
205 }
206
207 // Determine whether the next list item belongs here.
208 // Backpedal if it does not belong in this list.
209 if (i !== l - 1) {
210 b = this.rules.block.bullet.exec(itemMatch[i + 1])[0];
211 if (bull.length > 1 ? b.length === 1
212 : (b.length > 1 || (this.options.smartLists && b !== bull))) {
213 addBack = itemMatch.slice(i + 1).join('\n');
214 list.raw = list.raw.substring(0, list.raw.length - addBack.length);
215 i = l - 1;
216 }
217 }
218
219 // Determine whether item is loose or not.
220 // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
221 // for discount behavior.
222 loose = next || /\n\n(?!\s*$)/.test(item);
223 if (i !== l - 1) {
224 next = item.charAt(item.length - 1) === '\n';
225 if (!loose) loose = next;
226 }
227
228 if (loose) {
229 list.loose = true;
230 }
231
232 // Check for task list items
233 istask = /^\[[ xX]\] /.test(item);
234 ischecked = undefined;
235 if (istask) {
236 ischecked = item[1] !== ' ';
237 item = item.replace(/^\[[ xX]\] +/, '');
238 }
239
240 list.items.push({
241 raw,
242 task: istask,
243 checked: ischecked,
244 loose: loose,
245 text: item
246 });
247 }
248
249 return list;
250 }
251 }
252
253 html(src) {
254 const cap = this.rules.block.html.exec(src);
255 if (cap) {
256 return {
257 type: this.options.sanitize
258 ? 'paragraph'
259 : 'html',
260 raw: cap[0],
261 pre: !this.options.sanitizer
262 && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
263 text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
264 };
265 }
266 }
267
268 def(src) {
269 const cap = this.rules.block.def.exec(src);
270 if (cap) {
271 if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
272 const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
273 return {
274 tag,
275 raw: cap[0],
276 href: cap[2],
277 title: cap[3]
278 };
279 }
280 }
281
282 table(src) {
283 const cap = this.rules.block.table.exec(src);
284 if (cap) {
285 const item = {
286 type: 'table',
287 header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
288 align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
289 cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
290 };
291
292 if (item.header.length === item.align.length) {
293 item.raw = cap[0];
294
295 let l = item.align.length;
296 let i;
297 for (i = 0; i < l; i++) {
298 if (/^ *-+: *$/.test(item.align[i])) {
299 item.align[i] = 'right';
300 } else if (/^ *:-+: *$/.test(item.align[i])) {
301 item.align[i] = 'center';
302 } else if (/^ *:-+ *$/.test(item.align[i])) {
303 item.align[i] = 'left';
304 } else {
305 item.align[i] = null;
306 }
307 }
308
309 l = item.cells.length;
310 for (i = 0; i < l; i++) {
311 item.cells[i] = splitCells(
312 item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
313 item.header.length);
314 }
315
316 return item;
317 }
318 }
319 }
320
321 lheading(src) {
322 const cap = this.rules.block.lheading.exec(src);
323 if (cap) {
324 return {
325 type: 'heading',
326 raw: cap[0],
327 depth: cap[2].charAt(0) === '=' ? 1 : 2,
328 text: cap[1]
329 };
330 }
331 }
332
333 paragraph(src) {
334 const cap = this.rules.block.paragraph.exec(src);
335 if (cap) {
336 return {
337 type: 'paragraph',
338 raw: cap[0],
339 text: cap[1].charAt(cap[1].length - 1) === '\n'
340 ? cap[1].slice(0, -1)
341 : cap[1]
342 };
343 }
344 }
345
346 text(src) {
347 const cap = this.rules.block.text.exec(src);
348 if (cap) {
349 return {
350 type: 'text',
351 raw: cap[0],
352 text: cap[0]
353 };
354 }
355 }
356
357 escape(src) {
358 const cap = this.rules.inline.escape.exec(src);
359 if (cap) {
360 return {
361 type: 'escape',
362 raw: cap[0],
363 text: escape(cap[1])
364 };
365 }
366 }
367
368 tag(src, inLink, inRawBlock) {
369 const cap = this.rules.inline.tag.exec(src);
370 if (cap) {
371 if (!inLink && /^<a /i.test(cap[0])) {
372 inLink = true;
373 } else if (inLink && /^<\/a>/i.test(cap[0])) {
374 inLink = false;
375 }
376 if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
377 inRawBlock = true;
378 } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
379 inRawBlock = false;
380 }
381
382 return {
383 type: this.options.sanitize
384 ? 'text'
385 : 'html',
386 raw: cap[0],
387 inLink,
388 inRawBlock,
389 text: this.options.sanitize
390 ? (this.options.sanitizer
391 ? this.options.sanitizer(cap[0])
392 : escape(cap[0]))
393 : cap[0]
394 };
395 }
396 }
397
398 link(src) {
399 const cap = this.rules.inline.link.exec(src);
400 if (cap) {
401 const lastParenIndex = findClosingBracket(cap[2], '()');
402 if (lastParenIndex > -1) {
403 const start = cap[0].indexOf('!') === 0 ? 5 : 4;
404 const linkLen = start + cap[1].length + lastParenIndex;
405 cap[2] = cap[2].substring(0, lastParenIndex);
406 cap[0] = cap[0].substring(0, linkLen).trim();
407 cap[3] = '';
408 }
409 let href = cap[2];
410 let title = '';
411 if (this.options.pedantic) {
412 const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
413
414 if (link) {
415 href = link[1];
416 title = link[3];
417 } else {
418 title = '';
419 }
420 } else {
421 title = cap[3] ? cap[3].slice(1, -1) : '';
422 }
423 href = href.trim().replace(/^<([\s\S]*)>$/, '$1');
424 const token = outputLink(cap, {
425 href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
426 title: title ? title.replace(this.rules.inline._escapes, '$1') : title
427 }, cap[0]);
428 return token;
429 }
430 }
431
432 reflink(src, links) {
433 let cap;
434 if ((cap = this.rules.inline.reflink.exec(src))
435 || (cap = this.rules.inline.nolink.exec(src))) {
436 let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
437 link = links[link.toLowerCase()];
438 if (!link || !link.href) {
439 const text = cap[0].charAt(0);
440 return {
441 type: 'text',
442 raw: text,
443 text
444 };
445 }
446 const token = outputLink(cap, link, cap[0]);
447 return token;
448 }
449 }
450
451 strong(src) {
452 const cap = this.rules.inline.strong.exec(src);
453 if (cap) {
454 return {
455 type: 'strong',
456 raw: cap[0],
457 text: cap[4] || cap[3] || cap[2] || cap[1]
458 };
459 }
460 }
461
462 em(src) {
463 const cap = this.rules.inline.em.exec(src);
464 if (cap) {
465 return {
466 type: 'em',
467 raw: cap[0],
468 text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]
469 };
470 }
471 }
472
473 codespan(src) {
474 const cap = this.rules.inline.code.exec(src);
475 if (cap) {
476 return {
477 type: 'codespan',
478 raw: cap[0],
479 text: escape(cap[2].trim(), true)
480 };
481 }
482 }
483
484 br(src) {
485 const cap = this.rules.inline.br.exec(src);
486 if (cap) {
487 return {
488 type: 'br',
489 raw: cap[0]
490 };
491 }
492 }
493
494 del(src) {
495 const cap = this.rules.inline.del.exec(src);
496 if (cap) {
497 return {
498 type: 'del',
499 raw: cap[0],
500 text: cap[1]
501 };
502 }
503 }
504
505 autolink(src, mangle) {
506 const cap = this.rules.inline.autolink.exec(src);
507 if (cap) {
508 let text, href;
509 if (cap[2] === '@') {
510 text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
511 href = 'mailto:' + text;
512 } else {
513 text = escape(cap[1]);
514 href = text;
515 }
516
517 return {
518 type: 'link',
519 raw: cap[0],
520 text,
521 href,
522 tokens: [
523 {
524 type: 'text',
525 raw: text,
526 text
527 }
528 ]
529 };
530 }
531 }
532
533 url(src, mangle) {
534 let cap;
535 if (cap = this.rules.inline.url.exec(src)) {
536 let text, href;
537 if (cap[2] === '@') {
538 text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
539 href = 'mailto:' + text;
540 } else {
541 // do extended autolink path validation
542 let prevCapZero;
543 do {
544 prevCapZero = cap[0];
545 cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
546 } while (prevCapZero !== cap[0]);
547 text = escape(cap[0]);
548 if (cap[1] === 'www.') {
549 href = 'http://' + text;
550 } else {
551 href = text;
552 }
553 }
554 return {
555 type: 'link',
556 raw: cap[0],
557 text,
558 href,
559 tokens: [
560 {
561 type: 'text',
562 raw: text,
563 text
564 }
565 ]
566 };
567 }
568 }
569
570 inlineText(src, inRawBlock, smartypants) {
571 const cap = this.rules.inline.text.exec(src);
572 if (cap) {
573 let text;
574 if (inRawBlock) {
575 text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
576 } else {
577 text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
578 }
579 return {
580 type: 'text',
581 raw: cap[0],
582 text
583 };
584 }
585 }
586};