UNPKG

11.7 kBJavaScriptView Raw
1const Tokenizer = require('./Tokenizer.js');
2const { defaults } = require('./defaults.js');
3const { block, inline } = require('./rules.js');
4const { repeatString } = require('./helpers.js');
5
6/**
7 * smartypants text replacement
8 */
9function smartypants(text) {
10 return text
11 // em-dashes
12 .replace(/---/g, '\u2014')
13 // en-dashes
14 .replace(/--/g, '\u2013')
15 // opening singles
16 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
17 // closing singles & apostrophes
18 .replace(/'/g, '\u2019')
19 // opening doubles
20 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
21 // closing doubles
22 .replace(/"/g, '\u201d')
23 // ellipses
24 .replace(/\.{3}/g, '\u2026');
25}
26
27/**
28 * mangle email addresses
29 */
30function mangle(text) {
31 let out = '',
32 i,
33 ch;
34
35 const l = text.length;
36 for (i = 0; i < l; i++) {
37 ch = text.charCodeAt(i);
38 if (Math.random() > 0.5) {
39 ch = 'x' + ch.toString(16);
40 }
41 out += '&#' + ch + ';';
42 }
43
44 return out;
45}
46
47/**
48 * Block Lexer
49 */
50module.exports = class Lexer {
51 constructor(options) {
52 this.tokens = [];
53 this.tokens.links = Object.create(null);
54 this.options = options || defaults;
55 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
56 this.tokenizer = this.options.tokenizer;
57 this.tokenizer.options = this.options;
58
59 const rules = {
60 block: block.normal,
61 inline: inline.normal
62 };
63
64 if (this.options.pedantic) {
65 rules.block = block.pedantic;
66 rules.inline = inline.pedantic;
67 } else if (this.options.gfm) {
68 rules.block = block.gfm;
69 if (this.options.breaks) {
70 rules.inline = inline.breaks;
71 } else {
72 rules.inline = inline.gfm;
73 }
74 }
75 this.tokenizer.rules = rules;
76 }
77
78 /**
79 * Expose Rules
80 */
81 static get rules() {
82 return {
83 block,
84 inline
85 };
86 }
87
88 /**
89 * Static Lex Method
90 */
91 static lex(src, options) {
92 const lexer = new Lexer(options);
93 return lexer.lex(src);
94 }
95
96 /**
97 * Static Lex Inline Method
98 */
99 static lexInline(src, options) {
100 const lexer = new Lexer(options);
101 return lexer.inlineTokens(src);
102 }
103
104 /**
105 * Preprocessing
106 */
107 lex(src) {
108 src = src
109 .replace(/\r\n|\r/g, '\n')
110 .replace(/\t/g, ' ');
111
112 this.blockTokens(src, this.tokens, true);
113
114 this.inline(this.tokens);
115
116 return this.tokens;
117 }
118
119 /**
120 * Lexing
121 */
122 blockTokens(src, tokens = [], top = true) {
123 src = src.replace(/^ +$/gm, '');
124 let token, i, l, lastToken;
125
126 while (src) {
127 // newline
128 if (token = this.tokenizer.space(src)) {
129 src = src.substring(token.raw.length);
130 if (token.type) {
131 tokens.push(token);
132 }
133 continue;
134 }
135
136 // code
137 if (token = this.tokenizer.code(src, tokens)) {
138 src = src.substring(token.raw.length);
139 if (token.type) {
140 tokens.push(token);
141 } else {
142 lastToken = tokens[tokens.length - 1];
143 lastToken.raw += '\n' + token.raw;
144 lastToken.text += '\n' + token.text;
145 }
146 continue;
147 }
148
149 // fences
150 if (token = this.tokenizer.fences(src)) {
151 src = src.substring(token.raw.length);
152 tokens.push(token);
153 continue;
154 }
155
156 // heading
157 if (token = this.tokenizer.heading(src)) {
158 src = src.substring(token.raw.length);
159 tokens.push(token);
160 continue;
161 }
162
163 // table no leading pipe (gfm)
164 if (token = this.tokenizer.nptable(src)) {
165 src = src.substring(token.raw.length);
166 tokens.push(token);
167 continue;
168 }
169
170 // hr
171 if (token = this.tokenizer.hr(src)) {
172 src = src.substring(token.raw.length);
173 tokens.push(token);
174 continue;
175 }
176
177 // blockquote
178 if (token = this.tokenizer.blockquote(src)) {
179 src = src.substring(token.raw.length);
180 token.tokens = this.blockTokens(token.text, [], top);
181 tokens.push(token);
182 continue;
183 }
184
185 // list
186 if (token = this.tokenizer.list(src)) {
187 src = src.substring(token.raw.length);
188 l = token.items.length;
189 for (i = 0; i < l; i++) {
190 token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
191 }
192 tokens.push(token);
193 continue;
194 }
195
196 // html
197 if (token = this.tokenizer.html(src)) {
198 src = src.substring(token.raw.length);
199 tokens.push(token);
200 continue;
201 }
202
203 // def
204 if (top && (token = this.tokenizer.def(src))) {
205 src = src.substring(token.raw.length);
206 if (!this.tokens.links[token.tag]) {
207 this.tokens.links[token.tag] = {
208 href: token.href,
209 title: token.title
210 };
211 }
212 continue;
213 }
214
215 // table (gfm)
216 if (token = this.tokenizer.table(src)) {
217 src = src.substring(token.raw.length);
218 tokens.push(token);
219 continue;
220 }
221
222 // lheading
223 if (token = this.tokenizer.lheading(src)) {
224 src = src.substring(token.raw.length);
225 tokens.push(token);
226 continue;
227 }
228
229 // top-level paragraph
230 if (top && (token = this.tokenizer.paragraph(src))) {
231 src = src.substring(token.raw.length);
232 tokens.push(token);
233 continue;
234 }
235
236 // text
237 if (token = this.tokenizer.text(src, tokens)) {
238 src = src.substring(token.raw.length);
239 if (token.type) {
240 tokens.push(token);
241 } else {
242 lastToken = tokens[tokens.length - 1];
243 lastToken.raw += '\n' + token.raw;
244 lastToken.text += '\n' + token.text;
245 }
246 continue;
247 }
248
249 if (src) {
250 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
251 if (this.options.silent) {
252 console.error(errMsg);
253 break;
254 } else {
255 throw new Error(errMsg);
256 }
257 }
258 }
259
260 return tokens;
261 }
262
263 inline(tokens) {
264 let i,
265 j,
266 k,
267 l2,
268 row,
269 token;
270
271 const l = tokens.length;
272 for (i = 0; i < l; i++) {
273 token = tokens[i];
274 switch (token.type) {
275 case 'paragraph':
276 case 'text':
277 case 'heading': {
278 token.tokens = [];
279 this.inlineTokens(token.text, token.tokens);
280 break;
281 }
282 case 'table': {
283 token.tokens = {
284 header: [],
285 cells: []
286 };
287
288 // header
289 l2 = token.header.length;
290 for (j = 0; j < l2; j++) {
291 token.tokens.header[j] = [];
292 this.inlineTokens(token.header[j], token.tokens.header[j]);
293 }
294
295 // cells
296 l2 = token.cells.length;
297 for (j = 0; j < l2; j++) {
298 row = token.cells[j];
299 token.tokens.cells[j] = [];
300 for (k = 0; k < row.length; k++) {
301 token.tokens.cells[j][k] = [];
302 this.inlineTokens(row[k], token.tokens.cells[j][k]);
303 }
304 }
305
306 break;
307 }
308 case 'blockquote': {
309 this.inline(token.tokens);
310 break;
311 }
312 case 'list': {
313 l2 = token.items.length;
314 for (j = 0; j < l2; j++) {
315 this.inline(token.items[j].tokens);
316 }
317 break;
318 }
319 default: {
320 // do nothing
321 }
322 }
323 }
324
325 return tokens;
326 }
327
328 /**
329 * Lexing/Compiling
330 */
331 inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
332 let token;
333
334 // String with links masked to avoid interference with em and strong
335 let maskedSrc = src;
336 let match;
337 let keepPrevChar, prevChar;
338
339 // Mask out reflinks
340 if (this.tokens.links) {
341 const links = Object.keys(this.tokens.links);
342 if (links.length > 0) {
343 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
344 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
345 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
346 }
347 }
348 }
349 }
350 // Mask out other blocks
351 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
352 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
353 }
354
355 while (src) {
356 if (!keepPrevChar) {
357 prevChar = '';
358 }
359 keepPrevChar = false;
360 // escape
361 if (token = this.tokenizer.escape(src)) {
362 src = src.substring(token.raw.length);
363 tokens.push(token);
364 continue;
365 }
366
367 // tag
368 if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
369 src = src.substring(token.raw.length);
370 inLink = token.inLink;
371 inRawBlock = token.inRawBlock;
372 tokens.push(token);
373 continue;
374 }
375
376 // link
377 if (token = this.tokenizer.link(src)) {
378 src = src.substring(token.raw.length);
379 if (token.type === 'link') {
380 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
381 }
382 tokens.push(token);
383 continue;
384 }
385
386 // reflink, nolink
387 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
388 src = src.substring(token.raw.length);
389 if (token.type === 'link') {
390 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
391 }
392 tokens.push(token);
393 continue;
394 }
395
396 // strong
397 if (token = this.tokenizer.strong(src, maskedSrc, prevChar)) {
398 src = src.substring(token.raw.length);
399 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
400 tokens.push(token);
401 continue;
402 }
403
404 // em
405 if (token = this.tokenizer.em(src, maskedSrc, prevChar)) {
406 src = src.substring(token.raw.length);
407 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
408 tokens.push(token);
409 continue;
410 }
411
412 // code
413 if (token = this.tokenizer.codespan(src)) {
414 src = src.substring(token.raw.length);
415 tokens.push(token);
416 continue;
417 }
418
419 // br
420 if (token = this.tokenizer.br(src)) {
421 src = src.substring(token.raw.length);
422 tokens.push(token);
423 continue;
424 }
425
426 // del (gfm)
427 if (token = this.tokenizer.del(src)) {
428 src = src.substring(token.raw.length);
429 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
430 tokens.push(token);
431 continue;
432 }
433
434 // autolink
435 if (token = this.tokenizer.autolink(src, mangle)) {
436 src = src.substring(token.raw.length);
437 tokens.push(token);
438 continue;
439 }
440
441 // url (gfm)
442 if (!inLink && (token = this.tokenizer.url(src, mangle))) {
443 src = src.substring(token.raw.length);
444 tokens.push(token);
445 continue;
446 }
447
448 // text
449 if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
450 src = src.substring(token.raw.length);
451 prevChar = token.raw.slice(-1);
452 keepPrevChar = true;
453 tokens.push(token);
454 continue;
455 }
456
457 if (src) {
458 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
459 if (this.options.silent) {
460 console.error(errMsg);
461 break;
462 } else {
463 throw new Error(errMsg);
464 }
465 }
466 }
467
468 return tokens;
469 }
470};