UNPKG

11.5 kBJavaScriptView Raw
1const Tokenizer = require('./Tokenizer.js');
2const { defaults } = require('./defaults.js');
3const { block, inline } = require('./rules.js');
4const { repeatString } = require('./helpers.js');
5
6/**
7 * smartypants text replacement
8 */
9function smartypants(text) {
10 return text
11 // em-dashes
12 .replace(/---/g, '\u2014')
13 // en-dashes
14 .replace(/--/g, '\u2013')
15 // opening singles
16 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
17 // closing singles & apostrophes
18 .replace(/'/g, '\u2019')
19 // opening doubles
20 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
21 // closing doubles
22 .replace(/"/g, '\u201d')
23 // ellipses
24 .replace(/\.{3}/g, '\u2026');
25}
26
27/**
28 * mangle email addresses
29 */
30function mangle(text) {
31 let out = '',
32 i,
33 ch;
34
35 const l = text.length;
36 for (i = 0; i < l; i++) {
37 ch = text.charCodeAt(i);
38 if (Math.random() > 0.5) {
39 ch = 'x' + ch.toString(16);
40 }
41 out += '&#' + ch + ';';
42 }
43
44 return out;
45}
46
47/**
48 * Block Lexer
49 */
50module.exports = class Lexer {
51 constructor(options) {
52 this.tokens = [];
53 this.tokens.links = Object.create(null);
54 this.options = options || defaults;
55 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
56 this.tokenizer = this.options.tokenizer;
57 this.tokenizer.options = this.options;
58
59 const rules = {
60 block: block.normal,
61 inline: inline.normal
62 };
63
64 if (this.options.pedantic) {
65 rules.block = block.pedantic;
66 rules.inline = inline.pedantic;
67 } else if (this.options.gfm) {
68 rules.block = block.gfm;
69 if (this.options.breaks) {
70 rules.inline = inline.breaks;
71 } else {
72 rules.inline = inline.gfm;
73 }
74 }
75 this.tokenizer.rules = rules;
76 }
77
78 /**
79 * Expose Rules
80 */
81 static get rules() {
82 return {
83 block,
84 inline
85 };
86 }
87
88 /**
89 * Static Lex Method
90 */
91 static lex(src, options) {
92 const lexer = new Lexer(options);
93 return lexer.lex(src);
94 }
95
96 /**
97 * Static Lex Inline Method
98 */
99 static lexInline(src, options) {
100 const lexer = new Lexer(options);
101 return lexer.inlineTokens(src);
102 }
103
104 /**
105 * Preprocessing
106 */
107 lex(src) {
108 src = src
109 .replace(/\r\n|\r/g, '\n')
110 .replace(/\t/g, ' ');
111
112 this.blockTokens(src, this.tokens, true);
113
114 this.inline(this.tokens);
115
116 return this.tokens;
117 }
118
119 /**
120 * Lexing
121 */
122 blockTokens(src, tokens = [], top = true) {
123 src = src.replace(/^ +$/gm, '');
124 let token, i, l, lastToken;
125
126 while (src) {
127 // newline
128 if (token = this.tokenizer.space(src)) {
129 src = src.substring(token.raw.length);
130 if (token.type) {
131 tokens.push(token);
132 }
133 continue;
134 }
135
136 // code
137 if (token = this.tokenizer.code(src, tokens)) {
138 src = src.substring(token.raw.length);
139 if (token.type) {
140 tokens.push(token);
141 } else {
142 lastToken = tokens[tokens.length - 1];
143 lastToken.raw += '\n' + token.raw;
144 lastToken.text += '\n' + token.text;
145 }
146 continue;
147 }
148
149 // fences
150 if (token = this.tokenizer.fences(src)) {
151 src = src.substring(token.raw.length);
152 tokens.push(token);
153 continue;
154 }
155
156 // heading
157 if (token = this.tokenizer.heading(src)) {
158 src = src.substring(token.raw.length);
159 tokens.push(token);
160 continue;
161 }
162
163 // table no leading pipe (gfm)
164 if (token = this.tokenizer.nptable(src)) {
165 src = src.substring(token.raw.length);
166 tokens.push(token);
167 continue;
168 }
169
170 // hr
171 if (token = this.tokenizer.hr(src)) {
172 src = src.substring(token.raw.length);
173 tokens.push(token);
174 continue;
175 }
176
177 // blockquote
178 if (token = this.tokenizer.blockquote(src)) {
179 src = src.substring(token.raw.length);
180 token.tokens = this.blockTokens(token.text, [], top);
181 tokens.push(token);
182 continue;
183 }
184
185 // list
186 if (token = this.tokenizer.list(src)) {
187 src = src.substring(token.raw.length);
188 l = token.items.length;
189 for (i = 0; i < l; i++) {
190 token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
191 }
192 tokens.push(token);
193 continue;
194 }
195
196 // html
197 if (token = this.tokenizer.html(src)) {
198 src = src.substring(token.raw.length);
199 tokens.push(token);
200 continue;
201 }
202
203 // def
204 if (top && (token = this.tokenizer.def(src))) {
205 src = src.substring(token.raw.length);
206 if (!this.tokens.links[token.tag]) {
207 this.tokens.links[token.tag] = {
208 href: token.href,
209 title: token.title
210 };
211 }
212 continue;
213 }
214
215 // table (gfm)
216 if (token = this.tokenizer.table(src)) {
217 src = src.substring(token.raw.length);
218 tokens.push(token);
219 continue;
220 }
221
222 // lheading
223 if (token = this.tokenizer.lheading(src)) {
224 src = src.substring(token.raw.length);
225 tokens.push(token);
226 continue;
227 }
228
229 // top-level paragraph
230 if (top && (token = this.tokenizer.paragraph(src))) {
231 src = src.substring(token.raw.length);
232 tokens.push(token);
233 continue;
234 }
235
236 // text
237 if (token = this.tokenizer.text(src, tokens)) {
238 src = src.substring(token.raw.length);
239 if (token.type) {
240 tokens.push(token);
241 } else {
242 lastToken = tokens[tokens.length - 1];
243 lastToken.raw += '\n' + token.raw;
244 lastToken.text += '\n' + token.text;
245 }
246 continue;
247 }
248
249 if (src) {
250 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
251 if (this.options.silent) {
252 console.error(errMsg);
253 break;
254 } else {
255 throw new Error(errMsg);
256 }
257 }
258 }
259
260 return tokens;
261 }
262
263 inline(tokens) {
264 let i,
265 j,
266 k,
267 l2,
268 row,
269 token;
270
271 const l = tokens.length;
272 for (i = 0; i < l; i++) {
273 token = tokens[i];
274 switch (token.type) {
275 case 'paragraph':
276 case 'text':
277 case 'heading': {
278 token.tokens = [];
279 this.inlineTokens(token.text, token.tokens);
280 break;
281 }
282 case 'table': {
283 token.tokens = {
284 header: [],
285 cells: []
286 };
287
288 // header
289 l2 = token.header.length;
290 for (j = 0; j < l2; j++) {
291 token.tokens.header[j] = [];
292 this.inlineTokens(token.header[j], token.tokens.header[j]);
293 }
294
295 // cells
296 l2 = token.cells.length;
297 for (j = 0; j < l2; j++) {
298 row = token.cells[j];
299 token.tokens.cells[j] = [];
300 for (k = 0; k < row.length; k++) {
301 token.tokens.cells[j][k] = [];
302 this.inlineTokens(row[k], token.tokens.cells[j][k]);
303 }
304 }
305
306 break;
307 }
308 case 'blockquote': {
309 this.inline(token.tokens);
310 break;
311 }
312 case 'list': {
313 l2 = token.items.length;
314 for (j = 0; j < l2; j++) {
315 this.inline(token.items[j].tokens);
316 }
317 break;
318 }
319 default: {
320 // do nothing
321 }
322 }
323 }
324
325 return tokens;
326 }
327
328 /**
329 * Lexing/Compiling
330 */
331 inlineTokens(src, tokens = [], inLink = false, inRawBlock = false, prevChar = '') {
332 let token;
333
334 // String with links masked to avoid interference with em and strong
335 let maskedSrc = src;
336 let match;
337
338 // Mask out reflinks
339 if (this.tokens.links) {
340 const links = Object.keys(this.tokens.links);
341 if (links.length > 0) {
342 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
343 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
344 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
345 }
346 }
347 }
348 }
349 // Mask out other blocks
350 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
351 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
352 }
353
354 while (src) {
355 // escape
356 if (token = this.tokenizer.escape(src)) {
357 src = src.substring(token.raw.length);
358 tokens.push(token);
359 continue;
360 }
361
362 // tag
363 if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
364 src = src.substring(token.raw.length);
365 inLink = token.inLink;
366 inRawBlock = token.inRawBlock;
367 tokens.push(token);
368 continue;
369 }
370
371 // link
372 if (token = this.tokenizer.link(src)) {
373 src = src.substring(token.raw.length);
374 if (token.type === 'link') {
375 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
376 }
377 tokens.push(token);
378 continue;
379 }
380
381 // reflink, nolink
382 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
383 src = src.substring(token.raw.length);
384 if (token.type === 'link') {
385 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
386 }
387 tokens.push(token);
388 continue;
389 }
390
391 // strong
392 if (token = this.tokenizer.strong(src, maskedSrc, prevChar)) {
393 src = src.substring(token.raw.length);
394 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
395 tokens.push(token);
396 continue;
397 }
398
399 // em
400 if (token = this.tokenizer.em(src, maskedSrc, prevChar)) {
401 src = src.substring(token.raw.length);
402 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
403 tokens.push(token);
404 continue;
405 }
406
407 // code
408 if (token = this.tokenizer.codespan(src)) {
409 src = src.substring(token.raw.length);
410 tokens.push(token);
411 continue;
412 }
413
414 // br
415 if (token = this.tokenizer.br(src)) {
416 src = src.substring(token.raw.length);
417 tokens.push(token);
418 continue;
419 }
420
421 // del (gfm)
422 if (token = this.tokenizer.del(src)) {
423 src = src.substring(token.raw.length);
424 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
425 tokens.push(token);
426 continue;
427 }
428
429 // autolink
430 if (token = this.tokenizer.autolink(src, mangle)) {
431 src = src.substring(token.raw.length);
432 tokens.push(token);
433 continue;
434 }
435
436 // url (gfm)
437 if (!inLink && (token = this.tokenizer.url(src, mangle))) {
438 src = src.substring(token.raw.length);
439 tokens.push(token);
440 continue;
441 }
442
443 // text
444 if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
445 src = src.substring(token.raw.length);
446 prevChar = token.raw.slice(-1);
447 tokens.push(token);
448 continue;
449 }
450
451 if (src) {
452 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
453 if (this.options.silent) {
454 console.error(errMsg);
455 break;
456 } else {
457 throw new Error(errMsg);
458 }
459 }
460 }
461
462 return tokens;
463 }
464};