UNPKG

15.1 kBJavaScriptView Raw
1const Tokenizer = require('./Tokenizer.js');
2const { defaults } = require('./defaults.js');
3const { block, inline } = require('./rules.js');
4const { repeatString } = require('./helpers.js');
5
6/**
7 * smartypants text replacement
8 */
9function smartypants(text) {
10 return text
11 // em-dashes
12 .replace(/---/g, '\u2014')
13 // en-dashes
14 .replace(/--/g, '\u2013')
15 // opening singles
16 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
17 // closing singles & apostrophes
18 .replace(/'/g, '\u2019')
19 // opening doubles
20 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
21 // closing doubles
22 .replace(/"/g, '\u201d')
23 // ellipses
24 .replace(/\.{3}/g, '\u2026');
25}
26
27/**
28 * mangle email addresses
29 */
30function mangle(text) {
31 let out = '',
32 i,
33 ch;
34
35 const l = text.length;
36 for (i = 0; i < l; i++) {
37 ch = text.charCodeAt(i);
38 if (Math.random() > 0.5) {
39 ch = 'x' + ch.toString(16);
40 }
41 out += '&#' + ch + ';';
42 }
43
44 return out;
45}
46
47/**
48 * Block Lexer
49 */
50module.exports = class Lexer {
51 constructor(options) {
52 this.tokens = [];
53 this.tokens.links = Object.create(null);
54 this.options = options || defaults;
55 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
56 this.tokenizer = this.options.tokenizer;
57 this.tokenizer.options = this.options;
58
59 const rules = {
60 block: block.normal,
61 inline: inline.normal
62 };
63
64 if (this.options.pedantic) {
65 rules.block = block.pedantic;
66 rules.inline = inline.pedantic;
67 } else if (this.options.gfm) {
68 rules.block = block.gfm;
69 if (this.options.breaks) {
70 rules.inline = inline.breaks;
71 } else {
72 rules.inline = inline.gfm;
73 }
74 }
75 this.tokenizer.rules = rules;
76 }
77
78 /**
79 * Expose Rules
80 */
81 static get rules() {
82 return {
83 block,
84 inline
85 };
86 }
87
88 /**
89 * Static Lex Method
90 */
91 static lex(src, options) {
92 const lexer = new Lexer(options);
93 return lexer.lex(src);
94 }
95
96 /**
97 * Static Lex Inline Method
98 */
99 static lexInline(src, options) {
100 const lexer = new Lexer(options);
101 return lexer.inlineTokens(src);
102 }
103
104 /**
105 * Preprocessing
106 */
107 lex(src) {
108 src = src
109 .replace(/\r\n|\r/g, '\n')
110 .replace(/\t/g, ' ');
111
112 this.blockTokens(src, this.tokens, true);
113
114 this.inline(this.tokens);
115
116 return this.tokens;
117 }
118
119 /**
120 * Lexing
121 */
122 blockTokens(src, tokens = [], top = true) {
123 if (this.options.pedantic) {
124 src = src.replace(/^ +$/gm, '');
125 }
126 let token, i, l, lastToken, cutSrc, lastParagraphClipped;
127
128 while (src) {
129 if (this.options.extensions
130 && this.options.extensions.block
131 && this.options.extensions.block.some((extTokenizer) => {
132 if (token = extTokenizer.call(this, src, tokens)) {
133 src = src.substring(token.raw.length);
134 tokens.push(token);
135 return true;
136 }
137 return false;
138 })) {
139 continue;
140 }
141
142 // newline
143 if (token = this.tokenizer.space(src)) {
144 src = src.substring(token.raw.length);
145 if (token.type) {
146 tokens.push(token);
147 }
148 continue;
149 }
150
151 // code
152 if (token = this.tokenizer.code(src)) {
153 src = src.substring(token.raw.length);
154 lastToken = tokens[tokens.length - 1];
155 // An indented code block cannot interrupt a paragraph.
156 if (lastToken && lastToken.type === 'paragraph') {
157 lastToken.raw += '\n' + token.raw;
158 lastToken.text += '\n' + token.text;
159 } else {
160 tokens.push(token);
161 }
162 continue;
163 }
164
165 // fences
166 if (token = this.tokenizer.fences(src)) {
167 src = src.substring(token.raw.length);
168 tokens.push(token);
169 continue;
170 }
171
172 // heading
173 if (token = this.tokenizer.heading(src)) {
174 src = src.substring(token.raw.length);
175 tokens.push(token);
176 continue;
177 }
178
179 // table no leading pipe (gfm)
180 if (token = this.tokenizer.nptable(src)) {
181 src = src.substring(token.raw.length);
182 tokens.push(token);
183 continue;
184 }
185
186 // hr
187 if (token = this.tokenizer.hr(src)) {
188 src = src.substring(token.raw.length);
189 tokens.push(token);
190 continue;
191 }
192
193 // blockquote
194 if (token = this.tokenizer.blockquote(src)) {
195 src = src.substring(token.raw.length);
196 token.tokens = this.blockTokens(token.text, [], top);
197 tokens.push(token);
198 continue;
199 }
200
201 // list
202 if (token = this.tokenizer.list(src)) {
203 src = src.substring(token.raw.length);
204 l = token.items.length;
205 for (i = 0; i < l; i++) {
206 token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
207 }
208 tokens.push(token);
209 continue;
210 }
211
212 // html
213 if (token = this.tokenizer.html(src)) {
214 src = src.substring(token.raw.length);
215 tokens.push(token);
216 continue;
217 }
218
219 // def
220 if (top && (token = this.tokenizer.def(src))) {
221 src = src.substring(token.raw.length);
222 if (!this.tokens.links[token.tag]) {
223 this.tokens.links[token.tag] = {
224 href: token.href,
225 title: token.title
226 };
227 }
228 continue;
229 }
230
231 // table (gfm)
232 if (token = this.tokenizer.table(src)) {
233 src = src.substring(token.raw.length);
234 tokens.push(token);
235 continue;
236 }
237
238 // lheading
239 if (token = this.tokenizer.lheading(src)) {
240 src = src.substring(token.raw.length);
241 tokens.push(token);
242 continue;
243 }
244
245 // top-level paragraph
246 // prevent paragraph consuming extensions by clipping 'src' to extension start
247 cutSrc = src;
248 if (this.options.extensions && this.options.extensions.startBlock) {
249 let startIndex = Infinity;
250 const tempSrc = src.slice(1);
251 let tempStart;
252 this.options.extensions.startBlock.forEach(function(getStartIndex) {
253 tempStart = getStartIndex.call(this, tempSrc);
254 if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
255 });
256 if (startIndex < Infinity && startIndex >= 0) {
257 cutSrc = src.substring(0, startIndex + 1);
258 }
259 }
260 if (top && (token = this.tokenizer.paragraph(cutSrc))) {
261 lastToken = tokens[tokens.length - 1];
262 if (lastParagraphClipped && lastToken.type === 'paragraph') {
263 lastToken.raw += '\n' + token.raw;
264 lastToken.text += '\n' + token.text;
265 } else {
266 tokens.push(token);
267 }
268 lastParagraphClipped = (cutSrc.length !== src.length);
269 src = src.substring(token.raw.length);
270 continue;
271 }
272
273 // text
274 if (token = this.tokenizer.text(src)) {
275 src = src.substring(token.raw.length);
276 lastToken = tokens[tokens.length - 1];
277 if (lastToken && lastToken.type === 'text') {
278 lastToken.raw += '\n' + token.raw;
279 lastToken.text += '\n' + token.text;
280 } else {
281 tokens.push(token);
282 }
283 continue;
284 }
285
286 if (src) {
287 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
288 if (this.options.silent) {
289 console.error(errMsg);
290 break;
291 } else {
292 throw new Error(errMsg);
293 }
294 }
295 }
296
297 return tokens;
298 }
299
300 inline(tokens) {
301 let i,
302 j,
303 k,
304 l2,
305 row,
306 token;
307
308 const l = tokens.length;
309 for (i = 0; i < l; i++) {
310 token = tokens[i];
311 switch (token.type) {
312 case 'paragraph':
313 case 'text':
314 case 'heading': {
315 token.tokens = [];
316 this.inlineTokens(token.text, token.tokens);
317 break;
318 }
319 case 'table': {
320 token.tokens = {
321 header: [],
322 cells: []
323 };
324
325 // header
326 l2 = token.header.length;
327 for (j = 0; j < l2; j++) {
328 token.tokens.header[j] = [];
329 this.inlineTokens(token.header[j], token.tokens.header[j]);
330 }
331
332 // cells
333 l2 = token.cells.length;
334 for (j = 0; j < l2; j++) {
335 row = token.cells[j];
336 token.tokens.cells[j] = [];
337 for (k = 0; k < row.length; k++) {
338 token.tokens.cells[j][k] = [];
339 this.inlineTokens(row[k], token.tokens.cells[j][k]);
340 }
341 }
342
343 break;
344 }
345 case 'blockquote': {
346 this.inline(token.tokens);
347 break;
348 }
349 case 'list': {
350 l2 = token.items.length;
351 for (j = 0; j < l2; j++) {
352 this.inline(token.items[j].tokens);
353 }
354 break;
355 }
356 default: {
357 // do nothing
358 }
359 }
360 }
361
362 return tokens;
363 }
364
365 /**
366 * Lexing/Compiling
367 */
368 inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
369 let token, lastToken, cutSrc;
370
371 // String with links masked to avoid interference with em and strong
372 let maskedSrc = src;
373 let match;
374 let keepPrevChar, prevChar;
375
376 // Mask out reflinks
377 if (this.tokens.links) {
378 const links = Object.keys(this.tokens.links);
379 if (links.length > 0) {
380 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
381 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
382 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
383 }
384 }
385 }
386 }
387 // Mask out other blocks
388 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
389 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
390 }
391
392 // Mask out escaped em & strong delimiters
393 while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
394 maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
395 }
396
397 while (src) {
398 if (!keepPrevChar) {
399 prevChar = '';
400 }
401 keepPrevChar = false;
402
403 // extensions
404 if (this.options.extensions
405 && this.options.extensions.inline
406 && this.options.extensions.inline.some((extTokenizer) => {
407 if (token = extTokenizer.call(this, src, tokens)) {
408 src = src.substring(token.raw.length);
409 tokens.push(token);
410 return true;
411 }
412 return false;
413 })) {
414 continue;
415 }
416
417 // escape
418 if (token = this.tokenizer.escape(src)) {
419 src = src.substring(token.raw.length);
420 tokens.push(token);
421 continue;
422 }
423
424 // tag
425 if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
426 src = src.substring(token.raw.length);
427 inLink = token.inLink;
428 inRawBlock = token.inRawBlock;
429 lastToken = tokens[tokens.length - 1];
430 if (lastToken && token.type === 'text' && lastToken.type === 'text') {
431 lastToken.raw += token.raw;
432 lastToken.text += token.text;
433 } else {
434 tokens.push(token);
435 }
436 continue;
437 }
438
439 // link
440 if (token = this.tokenizer.link(src)) {
441 src = src.substring(token.raw.length);
442 if (token.type === 'link') {
443 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
444 }
445 tokens.push(token);
446 continue;
447 }
448
449 // reflink, nolink
450 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
451 src = src.substring(token.raw.length);
452 lastToken = tokens[tokens.length - 1];
453 if (token.type === 'link') {
454 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
455 tokens.push(token);
456 } else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
457 lastToken.raw += token.raw;
458 lastToken.text += token.text;
459 } else {
460 tokens.push(token);
461 }
462 continue;
463 }
464
465 // em & strong
466 if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
467 src = src.substring(token.raw.length);
468 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
469 tokens.push(token);
470 continue;
471 }
472
473 // code
474 if (token = this.tokenizer.codespan(src)) {
475 src = src.substring(token.raw.length);
476 tokens.push(token);
477 continue;
478 }
479
480 // br
481 if (token = this.tokenizer.br(src)) {
482 src = src.substring(token.raw.length);
483 tokens.push(token);
484 continue;
485 }
486
487 // del (gfm)
488 if (token = this.tokenizer.del(src)) {
489 src = src.substring(token.raw.length);
490 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
491 tokens.push(token);
492 continue;
493 }
494
495 // autolink
496 if (token = this.tokenizer.autolink(src, mangle)) {
497 src = src.substring(token.raw.length);
498 tokens.push(token);
499 continue;
500 }
501
502 // url (gfm)
503 if (!inLink && (token = this.tokenizer.url(src, mangle))) {
504 src = src.substring(token.raw.length);
505 tokens.push(token);
506 continue;
507 }
508
509 // text
510 // prevent inlineText consuming extensions by clipping 'src' to extension start
511 cutSrc = src;
512 if (this.options.extensions && this.options.extensions.startInline) {
513 let startIndex = Infinity;
514 const tempSrc = src.slice(1);
515 let tempStart;
516 this.options.extensions.startInline.forEach(function(getStartIndex) {
517 tempStart = getStartIndex.call(this, tempSrc);
518 if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
519 });
520 if (startIndex < Infinity && startIndex >= 0) {
521 cutSrc = src.substring(0, startIndex + 1);
522 }
523 }
524 if (token = this.tokenizer.inlineText(cutSrc, inRawBlock, smartypants)) {
525 src = src.substring(token.raw.length);
526 if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started
527 prevChar = token.raw.slice(-1);
528 }
529 keepPrevChar = true;
530 lastToken = tokens[tokens.length - 1];
531 if (lastToken && lastToken.type === 'text') {
532 lastToken.raw += token.raw;
533 lastToken.text += token.text;
534 } else {
535 tokens.push(token);
536 }
537 continue;
538 }
539
540 if (src) {
541 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
542 if (this.options.silent) {
543 console.error(errMsg);
544 break;
545 } else {
546 throw new Error(errMsg);
547 }
548 }
549 }
550
551 return tokens;
552 }
553};