UNPKG

11.7 kBJavaScriptView Raw
1const Tokenizer = require('./Tokenizer.js');
2const { defaults } = require('./defaults.js');
3const { block, inline } = require('./rules.js');
4const { repeatString } = require('./helpers.js');
5
6/**
7 * smartypants text replacement
8 */
9function smartypants(text) {
10 return text
11 // em-dashes
12 .replace(/---/g, '\u2014')
13 // en-dashes
14 .replace(/--/g, '\u2013')
15 // opening singles
16 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
17 // closing singles & apostrophes
18 .replace(/'/g, '\u2019')
19 // opening doubles
20 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
21 // closing doubles
22 .replace(/"/g, '\u201d')
23 // ellipses
24 .replace(/\.{3}/g, '\u2026');
25}
26
27/**
28 * mangle email addresses
29 */
30function mangle(text) {
31 let out = '',
32 i,
33 ch;
34
35 const l = text.length;
36 for (i = 0; i < l; i++) {
37 ch = text.charCodeAt(i);
38 if (Math.random() > 0.5) {
39 ch = 'x' + ch.toString(16);
40 }
41 out += '&#' + ch + ';';
42 }
43
44 return out;
45}
46
47/**
48 * Block Lexer
49 */
50module.exports = class Lexer {
51 constructor(options) {
52 this.tokens = [];
53 this.tokens.links = Object.create(null);
54 this.options = options || defaults;
55 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
56 this.tokenizer = this.options.tokenizer;
57 this.tokenizer.options = this.options;
58
59 const rules = {
60 block: block.normal,
61 inline: inline.normal
62 };
63
64 if (this.options.pedantic) {
65 rules.block = block.pedantic;
66 rules.inline = inline.pedantic;
67 } else if (this.options.gfm) {
68 rules.block = block.gfm;
69 if (this.options.breaks) {
70 rules.inline = inline.breaks;
71 } else {
72 rules.inline = inline.gfm;
73 }
74 }
75 this.tokenizer.rules = rules;
76 }
77
78 /**
79 * Expose Rules
80 */
81 static get rules() {
82 return {
83 block,
84 inline
85 };
86 }
87
88 /**
89 * Static Lex Method
90 */
91 static lex(src, options) {
92 const lexer = new Lexer(options);
93 return lexer.lex(src);
94 }
95
96 /**
97 * Static Lex Inline Method
98 */
99 static lexInline(src, options) {
100 const lexer = new Lexer(options);
101 return lexer.inlineTokens(src);
102 }
103
104 /**
105 * Preprocessing
106 */
107 lex(src) {
108 src = src
109 .replace(/\r\n|\r/g, '\n')
110 .replace(/\t/g, ' ');
111
112 this.blockTokens(src, this.tokens, true);
113
114 this.inline(this.tokens);
115
116 return this.tokens;
117 }
118
119 /**
120 * Lexing
121 */
122 blockTokens(src, tokens = [], top = true) {
123 if (this.options.pedantic) {
124 src = src.replace(/^ +$/gm, '');
125 }
126 let token, i, l, lastToken;
127
128 while (src) {
129 // newline
130 if (token = this.tokenizer.space(src)) {
131 src = src.substring(token.raw.length);
132 if (token.type) {
133 tokens.push(token);
134 }
135 continue;
136 }
137
138 // code
139 if (token = this.tokenizer.code(src, tokens)) {
140 src = src.substring(token.raw.length);
141 if (token.type) {
142 tokens.push(token);
143 } else {
144 lastToken = tokens[tokens.length - 1];
145 lastToken.raw += '\n' + token.raw;
146 lastToken.text += '\n' + token.text;
147 }
148 continue;
149 }
150
151 // fences
152 if (token = this.tokenizer.fences(src)) {
153 src = src.substring(token.raw.length);
154 tokens.push(token);
155 continue;
156 }
157
158 // heading
159 if (token = this.tokenizer.heading(src)) {
160 src = src.substring(token.raw.length);
161 tokens.push(token);
162 continue;
163 }
164
165 // table no leading pipe (gfm)
166 if (token = this.tokenizer.nptable(src)) {
167 src = src.substring(token.raw.length);
168 tokens.push(token);
169 continue;
170 }
171
172 // hr
173 if (token = this.tokenizer.hr(src)) {
174 src = src.substring(token.raw.length);
175 tokens.push(token);
176 continue;
177 }
178
179 // blockquote
180 if (token = this.tokenizer.blockquote(src)) {
181 src = src.substring(token.raw.length);
182 token.tokens = this.blockTokens(token.text, [], top);
183 tokens.push(token);
184 continue;
185 }
186
187 // list
188 if (token = this.tokenizer.list(src)) {
189 src = src.substring(token.raw.length);
190 l = token.items.length;
191 for (i = 0; i < l; i++) {
192 token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
193 }
194 tokens.push(token);
195 continue;
196 }
197
198 // html
199 if (token = this.tokenizer.html(src)) {
200 src = src.substring(token.raw.length);
201 tokens.push(token);
202 continue;
203 }
204
205 // def
206 if (top && (token = this.tokenizer.def(src))) {
207 src = src.substring(token.raw.length);
208 if (!this.tokens.links[token.tag]) {
209 this.tokens.links[token.tag] = {
210 href: token.href,
211 title: token.title
212 };
213 }
214 continue;
215 }
216
217 // table (gfm)
218 if (token = this.tokenizer.table(src)) {
219 src = src.substring(token.raw.length);
220 tokens.push(token);
221 continue;
222 }
223
224 // lheading
225 if (token = this.tokenizer.lheading(src)) {
226 src = src.substring(token.raw.length);
227 tokens.push(token);
228 continue;
229 }
230
231 // top-level paragraph
232 if (top && (token = this.tokenizer.paragraph(src))) {
233 src = src.substring(token.raw.length);
234 tokens.push(token);
235 continue;
236 }
237
238 // text
239 if (token = this.tokenizer.text(src, tokens)) {
240 src = src.substring(token.raw.length);
241 if (token.type) {
242 tokens.push(token);
243 } else {
244 lastToken = tokens[tokens.length - 1];
245 lastToken.raw += '\n' + token.raw;
246 lastToken.text += '\n' + token.text;
247 }
248 continue;
249 }
250
251 if (src) {
252 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
253 if (this.options.silent) {
254 console.error(errMsg);
255 break;
256 } else {
257 throw new Error(errMsg);
258 }
259 }
260 }
261
262 return tokens;
263 }
264
265 inline(tokens) {
266 let i,
267 j,
268 k,
269 l2,
270 row,
271 token;
272
273 const l = tokens.length;
274 for (i = 0; i < l; i++) {
275 token = tokens[i];
276 switch (token.type) {
277 case 'paragraph':
278 case 'text':
279 case 'heading': {
280 token.tokens = [];
281 this.inlineTokens(token.text, token.tokens);
282 break;
283 }
284 case 'table': {
285 token.tokens = {
286 header: [],
287 cells: []
288 };
289
290 // header
291 l2 = token.header.length;
292 for (j = 0; j < l2; j++) {
293 token.tokens.header[j] = [];
294 this.inlineTokens(token.header[j], token.tokens.header[j]);
295 }
296
297 // cells
298 l2 = token.cells.length;
299 for (j = 0; j < l2; j++) {
300 row = token.cells[j];
301 token.tokens.cells[j] = [];
302 for (k = 0; k < row.length; k++) {
303 token.tokens.cells[j][k] = [];
304 this.inlineTokens(row[k], token.tokens.cells[j][k]);
305 }
306 }
307
308 break;
309 }
310 case 'blockquote': {
311 this.inline(token.tokens);
312 break;
313 }
314 case 'list': {
315 l2 = token.items.length;
316 for (j = 0; j < l2; j++) {
317 this.inline(token.items[j].tokens);
318 }
319 break;
320 }
321 default: {
322 // do nothing
323 }
324 }
325 }
326
327 return tokens;
328 }
329
330 /**
331 * Lexing/Compiling
332 */
333 inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
334 let token;
335
336 // String with links masked to avoid interference with em and strong
337 let maskedSrc = src;
338 let match;
339 let keepPrevChar, prevChar;
340
341 // Mask out reflinks
342 if (this.tokens.links) {
343 const links = Object.keys(this.tokens.links);
344 if (links.length > 0) {
345 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
346 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
347 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
348 }
349 }
350 }
351 }
352 // Mask out other blocks
353 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
354 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
355 }
356
357 while (src) {
358 if (!keepPrevChar) {
359 prevChar = '';
360 }
361 keepPrevChar = false;
362 // escape
363 if (token = this.tokenizer.escape(src)) {
364 src = src.substring(token.raw.length);
365 tokens.push(token);
366 continue;
367 }
368
369 // tag
370 if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
371 src = src.substring(token.raw.length);
372 inLink = token.inLink;
373 inRawBlock = token.inRawBlock;
374 tokens.push(token);
375 continue;
376 }
377
378 // link
379 if (token = this.tokenizer.link(src)) {
380 src = src.substring(token.raw.length);
381 if (token.type === 'link') {
382 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
383 }
384 tokens.push(token);
385 continue;
386 }
387
388 // reflink, nolink
389 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
390 src = src.substring(token.raw.length);
391 if (token.type === 'link') {
392 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
393 }
394 tokens.push(token);
395 continue;
396 }
397
398 // strong
399 if (token = this.tokenizer.strong(src, maskedSrc, prevChar)) {
400 src = src.substring(token.raw.length);
401 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
402 tokens.push(token);
403 continue;
404 }
405
406 // em
407 if (token = this.tokenizer.em(src, maskedSrc, prevChar)) {
408 src = src.substring(token.raw.length);
409 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
410 tokens.push(token);
411 continue;
412 }
413
414 // code
415 if (token = this.tokenizer.codespan(src)) {
416 src = src.substring(token.raw.length);
417 tokens.push(token);
418 continue;
419 }
420
421 // br
422 if (token = this.tokenizer.br(src)) {
423 src = src.substring(token.raw.length);
424 tokens.push(token);
425 continue;
426 }
427
428 // del (gfm)
429 if (token = this.tokenizer.del(src)) {
430 src = src.substring(token.raw.length);
431 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
432 tokens.push(token);
433 continue;
434 }
435
436 // autolink
437 if (token = this.tokenizer.autolink(src, mangle)) {
438 src = src.substring(token.raw.length);
439 tokens.push(token);
440 continue;
441 }
442
443 // url (gfm)
444 if (!inLink && (token = this.tokenizer.url(src, mangle))) {
445 src = src.substring(token.raw.length);
446 tokens.push(token);
447 continue;
448 }
449
450 // text
451 if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
452 src = src.substring(token.raw.length);
453 prevChar = token.raw.slice(-1);
454 keepPrevChar = true;
455 tokens.push(token);
456 continue;
457 }
458
459 if (src) {
460 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
461 if (this.options.silent) {
462 console.error(errMsg);
463 break;
464 } else {
465 throw new Error(errMsg);
466 }
467 }
468 }
469
470 return tokens;
471 }
472};