UNPKG

12.7 kBJavaScriptView Raw
1const Tokenizer = require('./Tokenizer.js');
2const { defaults } = require('./defaults.js');
3const { block, inline } = require('./rules.js');
4const { repeatString } = require('./helpers.js');
5
6/**
7 * smartypants text replacement
8 */
9function smartypants(text) {
10 return text
11 // em-dashes
12 .replace(/---/g, '\u2014')
13 // en-dashes
14 .replace(/--/g, '\u2013')
15 // opening singles
16 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
17 // closing singles & apostrophes
18 .replace(/'/g, '\u2019')
19 // opening doubles
20 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
21 // closing doubles
22 .replace(/"/g, '\u201d')
23 // ellipses
24 .replace(/\.{3}/g, '\u2026');
25}
26
27/**
28 * mangle email addresses
29 */
30function mangle(text) {
31 let out = '',
32 i,
33 ch;
34
35 const l = text.length;
36 for (i = 0; i < l; i++) {
37 ch = text.charCodeAt(i);
38 if (Math.random() > 0.5) {
39 ch = 'x' + ch.toString(16);
40 }
41 out += '&#' + ch + ';';
42 }
43
44 return out;
45}
46
47/**
48 * Block Lexer
49 */
50module.exports = class Lexer {
51 constructor(options) {
52 this.tokens = [];
53 this.tokens.links = Object.create(null);
54 this.options = options || defaults;
55 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
56 this.tokenizer = this.options.tokenizer;
57 this.tokenizer.options = this.options;
58
59 const rules = {
60 block: block.normal,
61 inline: inline.normal
62 };
63
64 if (this.options.pedantic) {
65 rules.block = block.pedantic;
66 rules.inline = inline.pedantic;
67 } else if (this.options.gfm) {
68 rules.block = block.gfm;
69 if (this.options.breaks) {
70 rules.inline = inline.breaks;
71 } else {
72 rules.inline = inline.gfm;
73 }
74 }
75 this.tokenizer.rules = rules;
76 }
77
78 /**
79 * Expose Rules
80 */
81 static get rules() {
82 return {
83 block,
84 inline
85 };
86 }
87
88 /**
89 * Static Lex Method
90 */
91 static lex(src, options) {
92 const lexer = new Lexer(options);
93 return lexer.lex(src);
94 }
95
96 /**
97 * Static Lex Inline Method
98 */
99 static lexInline(src, options) {
100 const lexer = new Lexer(options);
101 return lexer.inlineTokens(src);
102 }
103
104 /**
105 * Preprocessing
106 */
107 lex(src) {
108 src = src
109 .replace(/\r\n|\r/g, '\n')
110 .replace(/\t/g, ' ');
111
112 this.blockTokens(src, this.tokens, true);
113
114 this.inline(this.tokens);
115
116 return this.tokens;
117 }
118
119 /**
120 * Lexing
121 */
122 blockTokens(src, tokens = [], top = true) {
123 if (this.options.pedantic) {
124 src = src.replace(/^ +$/gm, '');
125 }
126 let token, i, l, lastToken;
127
128 while (src) {
129 // newline
130 if (token = this.tokenizer.space(src)) {
131 src = src.substring(token.raw.length);
132 if (token.type) {
133 tokens.push(token);
134 }
135 continue;
136 }
137
138 // code
139 if (token = this.tokenizer.code(src)) {
140 src = src.substring(token.raw.length);
141 lastToken = tokens[tokens.length - 1];
142 // An indented code block cannot interrupt a paragraph.
143 if (lastToken && lastToken.type === 'paragraph') {
144 lastToken.raw += '\n' + token.raw;
145 lastToken.text += '\n' + token.text;
146 } else {
147 tokens.push(token);
148 }
149 continue;
150 }
151
152 // fences
153 if (token = this.tokenizer.fences(src)) {
154 src = src.substring(token.raw.length);
155 tokens.push(token);
156 continue;
157 }
158
159 // heading
160 if (token = this.tokenizer.heading(src)) {
161 src = src.substring(token.raw.length);
162 tokens.push(token);
163 continue;
164 }
165
166 // table no leading pipe (gfm)
167 if (token = this.tokenizer.nptable(src)) {
168 src = src.substring(token.raw.length);
169 tokens.push(token);
170 continue;
171 }
172
173 // hr
174 if (token = this.tokenizer.hr(src)) {
175 src = src.substring(token.raw.length);
176 tokens.push(token);
177 continue;
178 }
179
180 // blockquote
181 if (token = this.tokenizer.blockquote(src)) {
182 src = src.substring(token.raw.length);
183 token.tokens = this.blockTokens(token.text, [], top);
184 tokens.push(token);
185 continue;
186 }
187
188 // list
189 if (token = this.tokenizer.list(src)) {
190 src = src.substring(token.raw.length);
191 l = token.items.length;
192 for (i = 0; i < l; i++) {
193 token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
194 }
195 tokens.push(token);
196 continue;
197 }
198
199 // html
200 if (token = this.tokenizer.html(src)) {
201 src = src.substring(token.raw.length);
202 tokens.push(token);
203 continue;
204 }
205
206 // def
207 if (top && (token = this.tokenizer.def(src))) {
208 src = src.substring(token.raw.length);
209 if (!this.tokens.links[token.tag]) {
210 this.tokens.links[token.tag] = {
211 href: token.href,
212 title: token.title
213 };
214 }
215 continue;
216 }
217
218 // table (gfm)
219 if (token = this.tokenizer.table(src)) {
220 src = src.substring(token.raw.length);
221 tokens.push(token);
222 continue;
223 }
224
225 // lheading
226 if (token = this.tokenizer.lheading(src)) {
227 src = src.substring(token.raw.length);
228 tokens.push(token);
229 continue;
230 }
231
232 // top-level paragraph
233 if (top && (token = this.tokenizer.paragraph(src))) {
234 src = src.substring(token.raw.length);
235 tokens.push(token);
236 continue;
237 }
238
239 // text
240 if (token = this.tokenizer.text(src)) {
241 src = src.substring(token.raw.length);
242 lastToken = tokens[tokens.length - 1];
243 if (lastToken && lastToken.type === 'text') {
244 lastToken.raw += '\n' + token.raw;
245 lastToken.text += '\n' + token.text;
246 } else {
247 tokens.push(token);
248 }
249 continue;
250 }
251
252 if (src) {
253 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
254 if (this.options.silent) {
255 console.error(errMsg);
256 break;
257 } else {
258 throw new Error(errMsg);
259 }
260 }
261 }
262
263 return tokens;
264 }
265
266 inline(tokens) {
267 let i,
268 j,
269 k,
270 l2,
271 row,
272 token;
273
274 const l = tokens.length;
275 for (i = 0; i < l; i++) {
276 token = tokens[i];
277 switch (token.type) {
278 case 'paragraph':
279 case 'text':
280 case 'heading': {
281 token.tokens = [];
282 this.inlineTokens(token.text, token.tokens);
283 break;
284 }
285 case 'table': {
286 token.tokens = {
287 header: [],
288 cells: []
289 };
290
291 // header
292 l2 = token.header.length;
293 for (j = 0; j < l2; j++) {
294 token.tokens.header[j] = [];
295 this.inlineTokens(token.header[j], token.tokens.header[j]);
296 }
297
298 // cells
299 l2 = token.cells.length;
300 for (j = 0; j < l2; j++) {
301 row = token.cells[j];
302 token.tokens.cells[j] = [];
303 for (k = 0; k < row.length; k++) {
304 token.tokens.cells[j][k] = [];
305 this.inlineTokens(row[k], token.tokens.cells[j][k]);
306 }
307 }
308
309 break;
310 }
311 case 'blockquote': {
312 this.inline(token.tokens);
313 break;
314 }
315 case 'list': {
316 l2 = token.items.length;
317 for (j = 0; j < l2; j++) {
318 this.inline(token.items[j].tokens);
319 }
320 break;
321 }
322 default: {
323 // do nothing
324 }
325 }
326 }
327
328 return tokens;
329 }
330
331 /**
332 * Lexing/Compiling
333 */
334 inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
335 let token, lastToken;
336
337 // String with links masked to avoid interference with em and strong
338 let maskedSrc = src;
339 let match;
340 let keepPrevChar, prevChar;
341
342 // Mask out reflinks
343 if (this.tokens.links) {
344 const links = Object.keys(this.tokens.links);
345 if (links.length > 0) {
346 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
347 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
348 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
349 }
350 }
351 }
352 }
353 // Mask out other blocks
354 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
355 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
356 }
357
358 // Mask out escaped em & strong delimiters
359 while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
360 maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
361 }
362
363 while (src) {
364 if (!keepPrevChar) {
365 prevChar = '';
366 }
367 keepPrevChar = false;
368
369 // escape
370 if (token = this.tokenizer.escape(src)) {
371 src = src.substring(token.raw.length);
372 tokens.push(token);
373 continue;
374 }
375
376 // tag
377 if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
378 src = src.substring(token.raw.length);
379 inLink = token.inLink;
380 inRawBlock = token.inRawBlock;
381 const lastToken = tokens[tokens.length - 1];
382 if (lastToken && token.type === 'text' && lastToken.type === 'text') {
383 lastToken.raw += token.raw;
384 lastToken.text += token.text;
385 } else {
386 tokens.push(token);
387 }
388 continue;
389 }
390
391 // link
392 if (token = this.tokenizer.link(src)) {
393 src = src.substring(token.raw.length);
394 if (token.type === 'link') {
395 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
396 }
397 tokens.push(token);
398 continue;
399 }
400
401 // reflink, nolink
402 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
403 src = src.substring(token.raw.length);
404 const lastToken = tokens[tokens.length - 1];
405 if (token.type === 'link') {
406 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
407 tokens.push(token);
408 } else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
409 lastToken.raw += token.raw;
410 lastToken.text += token.text;
411 } else {
412 tokens.push(token);
413 }
414 continue;
415 }
416
417 // em & strong
418 if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
419 src = src.substring(token.raw.length);
420 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
421 tokens.push(token);
422 continue;
423 }
424
425 // code
426 if (token = this.tokenizer.codespan(src)) {
427 src = src.substring(token.raw.length);
428 tokens.push(token);
429 continue;
430 }
431
432 // br
433 if (token = this.tokenizer.br(src)) {
434 src = src.substring(token.raw.length);
435 tokens.push(token);
436 continue;
437 }
438
439 // del (gfm)
440 if (token = this.tokenizer.del(src)) {
441 src = src.substring(token.raw.length);
442 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
443 tokens.push(token);
444 continue;
445 }
446
447 // autolink
448 if (token = this.tokenizer.autolink(src, mangle)) {
449 src = src.substring(token.raw.length);
450 tokens.push(token);
451 continue;
452 }
453
454 // url (gfm)
455 if (!inLink && (token = this.tokenizer.url(src, mangle))) {
456 src = src.substring(token.raw.length);
457 tokens.push(token);
458 continue;
459 }
460
461 // text
462 if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
463 src = src.substring(token.raw.length);
464 if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started
465 prevChar = token.raw.slice(-1);
466 }
467 keepPrevChar = true;
468 lastToken = tokens[tokens.length - 1];
469 if (lastToken && lastToken.type === 'text') {
470 lastToken.raw += token.raw;
471 lastToken.text += token.text;
472 } else {
473 tokens.push(token);
474 }
475 continue;
476 }
477
478 if (src) {
479 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
480 if (this.options.silent) {
481 console.error(errMsg);
482 break;
483 } else {
484 throw new Error(errMsg);
485 }
486 }
487 }
488
489 return tokens;
490 }
491};