UNPKG

13.7 kBJavaScriptView Raw
1const Tokenizer = require('./Tokenizer.js');
2const { defaults } = require('./defaults.js');
3const { block, inline } = require('./rules.js');
4const { repeatString } = require('./helpers.js');
5
6/**
7 * smartypants text replacement
8 */
9function smartypants(text) {
10 return text
11 // em-dashes
12 .replace(/---/g, '\u2014')
13 // en-dashes
14 .replace(/--/g, '\u2013')
15 // opening singles
16 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
17 // closing singles & apostrophes
18 .replace(/'/g, '\u2019')
19 // opening doubles
20 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
21 // closing doubles
22 .replace(/"/g, '\u201d')
23 // ellipses
24 .replace(/\.{3}/g, '\u2026');
25}
26
27/**
28 * mangle email addresses
29 */
30function mangle(text) {
31 let out = '',
32 i,
33 ch;
34
35 const l = text.length;
36 for (i = 0; i < l; i++) {
37 ch = text.charCodeAt(i);
38 if (Math.random() > 0.5) {
39 ch = 'x' + ch.toString(16);
40 }
41 out += '&#' + ch + ';';
42 }
43
44 return out;
45}
46
47/**
48 * Block Lexer
49 */
50module.exports = class Lexer {
51 constructor(options) {
52 this.tokens = [];
53 this.tokens.links = Object.create(null);
54 this.options = options || defaults;
55 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
56 this.tokenizer = this.options.tokenizer;
57 this.tokenizer.options = this.options;
58 this.tokenizer.lexer = this;
59 this.inlineQueue = [];
60 this.state = {
61 inLink: false,
62 inRawBlock: false,
63 top: true
64 };
65
66 const rules = {
67 block: block.normal,
68 inline: inline.normal
69 };
70
71 if (this.options.pedantic) {
72 rules.block = block.pedantic;
73 rules.inline = inline.pedantic;
74 } else if (this.options.gfm) {
75 rules.block = block.gfm;
76 if (this.options.breaks) {
77 rules.inline = inline.breaks;
78 } else {
79 rules.inline = inline.gfm;
80 }
81 }
82 this.tokenizer.rules = rules;
83 }
84
85 /**
86 * Expose Rules
87 */
88 static get rules() {
89 return {
90 block,
91 inline
92 };
93 }
94
95 /**
96 * Static Lex Method
97 */
98 static lex(src, options) {
99 const lexer = new Lexer(options);
100 return lexer.lex(src);
101 }
102
103 /**
104 * Static Lex Inline Method
105 */
106 static lexInline(src, options) {
107 const lexer = new Lexer(options);
108 return lexer.inlineTokens(src);
109 }
110
111 /**
112 * Preprocessing
113 */
114 lex(src) {
115 src = src
116 .replace(/\r\n|\r/g, '\n')
117 .replace(/\t/g, ' ');
118
119 this.blockTokens(src, this.tokens);
120
121 let next;
122 while (next = this.inlineQueue.shift()) {
123 this.inlineTokens(next.src, next.tokens);
124 }
125
126 return this.tokens;
127 }
128
129 /**
130 * Lexing
131 */
132 blockTokens(src, tokens = []) {
133 if (this.options.pedantic) {
134 src = src.replace(/^ +$/gm, '');
135 }
136 let token, lastToken, cutSrc, lastParagraphClipped;
137
138 while (src) {
139 if (this.options.extensions
140 && this.options.extensions.block
141 && this.options.extensions.block.some((extTokenizer) => {
142 if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
143 src = src.substring(token.raw.length);
144 tokens.push(token);
145 return true;
146 }
147 return false;
148 })) {
149 continue;
150 }
151
152 // newline
153 if (token = this.tokenizer.space(src)) {
154 src = src.substring(token.raw.length);
155 if (token.type) {
156 tokens.push(token);
157 }
158 continue;
159 }
160
161 // code
162 if (token = this.tokenizer.code(src)) {
163 src = src.substring(token.raw.length);
164 lastToken = tokens[tokens.length - 1];
165 // An indented code block cannot interrupt a paragraph.
166 if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
167 lastToken.raw += '\n' + token.raw;
168 lastToken.text += '\n' + token.text;
169 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
170 } else {
171 tokens.push(token);
172 }
173 continue;
174 }
175
176 // fences
177 if (token = this.tokenizer.fences(src)) {
178 src = src.substring(token.raw.length);
179 tokens.push(token);
180 continue;
181 }
182
183 // heading
184 if (token = this.tokenizer.heading(src)) {
185 src = src.substring(token.raw.length);
186 tokens.push(token);
187 continue;
188 }
189
190 // hr
191 if (token = this.tokenizer.hr(src)) {
192 src = src.substring(token.raw.length);
193 tokens.push(token);
194 continue;
195 }
196
197 // blockquote
198 if (token = this.tokenizer.blockquote(src)) {
199 src = src.substring(token.raw.length);
200 tokens.push(token);
201 continue;
202 }
203
204 // list
205 if (token = this.tokenizer.list(src)) {
206 src = src.substring(token.raw.length);
207 tokens.push(token);
208 continue;
209 }
210
211 // html
212 if (token = this.tokenizer.html(src)) {
213 src = src.substring(token.raw.length);
214 tokens.push(token);
215 continue;
216 }
217
218 // def
219 if (token = this.tokenizer.def(src)) {
220 src = src.substring(token.raw.length);
221 lastToken = tokens[tokens.length - 1];
222 if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
223 lastToken.raw += '\n' + token.raw;
224 lastToken.text += '\n' + token.raw;
225 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
226 } else if (!this.tokens.links[token.tag]) {
227 this.tokens.links[token.tag] = {
228 href: token.href,
229 title: token.title
230 };
231 }
232 continue;
233 }
234
235 // table (gfm)
236 if (token = this.tokenizer.table(src)) {
237 src = src.substring(token.raw.length);
238 tokens.push(token);
239 continue;
240 }
241
242 // lheading
243 if (token = this.tokenizer.lheading(src)) {
244 src = src.substring(token.raw.length);
245 tokens.push(token);
246 continue;
247 }
248
249 // top-level paragraph
250 // prevent paragraph consuming extensions by clipping 'src' to extension start
251 cutSrc = src;
252 if (this.options.extensions && this.options.extensions.startBlock) {
253 let startIndex = Infinity;
254 const tempSrc = src.slice(1);
255 let tempStart;
256 this.options.extensions.startBlock.forEach(function(getStartIndex) {
257 tempStart = getStartIndex.call({ lexer: this }, tempSrc);
258 if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
259 });
260 if (startIndex < Infinity && startIndex >= 0) {
261 cutSrc = src.substring(0, startIndex + 1);
262 }
263 }
264 if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) {
265 lastToken = tokens[tokens.length - 1];
266 if (lastParagraphClipped && lastToken.type === 'paragraph') {
267 lastToken.raw += '\n' + token.raw;
268 lastToken.text += '\n' + token.text;
269 this.inlineQueue.pop();
270 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
271 } else {
272 tokens.push(token);
273 }
274 lastParagraphClipped = (cutSrc.length !== src.length);
275 src = src.substring(token.raw.length);
276 continue;
277 }
278
279 // text
280 if (token = this.tokenizer.text(src)) {
281 src = src.substring(token.raw.length);
282 lastToken = tokens[tokens.length - 1];
283 if (lastToken && lastToken.type === 'text') {
284 lastToken.raw += '\n' + token.raw;
285 lastToken.text += '\n' + token.text;
286 this.inlineQueue.pop();
287 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
288 } else {
289 tokens.push(token);
290 }
291 continue;
292 }
293
294 if (src) {
295 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
296 if (this.options.silent) {
297 console.error(errMsg);
298 break;
299 } else {
300 throw new Error(errMsg);
301 }
302 }
303 }
304
305 this.state.top = true;
306 return tokens;
307 }
308
309 inline(src, tokens) {
310 this.inlineQueue.push({ src, tokens });
311 }
312
313 /**
314 * Lexing/Compiling
315 */
316 inlineTokens(src, tokens = []) {
317 let token, lastToken, cutSrc;
318
319 // String with links masked to avoid interference with em and strong
320 let maskedSrc = src;
321 let match;
322 let keepPrevChar, prevChar;
323
324 // Mask out reflinks
325 if (this.tokens.links) {
326 const links = Object.keys(this.tokens.links);
327 if (links.length > 0) {
328 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
329 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
330 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
331 }
332 }
333 }
334 }
335 // Mask out other blocks
336 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
337 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
338 }
339
340 // Mask out escaped em & strong delimiters
341 while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
342 maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
343 }
344
345 while (src) {
346 if (!keepPrevChar) {
347 prevChar = '';
348 }
349 keepPrevChar = false;
350
351 // extensions
352 if (this.options.extensions
353 && this.options.extensions.inline
354 && this.options.extensions.inline.some((extTokenizer) => {
355 if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
356 src = src.substring(token.raw.length);
357 tokens.push(token);
358 return true;
359 }
360 return false;
361 })) {
362 continue;
363 }
364
365 // escape
366 if (token = this.tokenizer.escape(src)) {
367 src = src.substring(token.raw.length);
368 tokens.push(token);
369 continue;
370 }
371
372 // tag
373 if (token = this.tokenizer.tag(src)) {
374 src = src.substring(token.raw.length);
375 lastToken = tokens[tokens.length - 1];
376 if (lastToken && token.type === 'text' && lastToken.type === 'text') {
377 lastToken.raw += token.raw;
378 lastToken.text += token.text;
379 } else {
380 tokens.push(token);
381 }
382 continue;
383 }
384
385 // link
386 if (token = this.tokenizer.link(src)) {
387 src = src.substring(token.raw.length);
388 tokens.push(token);
389 continue;
390 }
391
392 // reflink, nolink
393 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
394 src = src.substring(token.raw.length);
395 lastToken = tokens[tokens.length - 1];
396 if (lastToken && token.type === 'text' && lastToken.type === 'text') {
397 lastToken.raw += token.raw;
398 lastToken.text += token.text;
399 } else {
400 tokens.push(token);
401 }
402 continue;
403 }
404
405 // em & strong
406 if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
407 src = src.substring(token.raw.length);
408 tokens.push(token);
409 continue;
410 }
411
412 // code
413 if (token = this.tokenizer.codespan(src)) {
414 src = src.substring(token.raw.length);
415 tokens.push(token);
416 continue;
417 }
418
419 // br
420 if (token = this.tokenizer.br(src)) {
421 src = src.substring(token.raw.length);
422 tokens.push(token);
423 continue;
424 }
425
426 // del (gfm)
427 if (token = this.tokenizer.del(src)) {
428 src = src.substring(token.raw.length);
429 tokens.push(token);
430 continue;
431 }
432
433 // autolink
434 if (token = this.tokenizer.autolink(src, mangle)) {
435 src = src.substring(token.raw.length);
436 tokens.push(token);
437 continue;
438 }
439
440 // url (gfm)
441 if (!this.state.inLink && (token = this.tokenizer.url(src, mangle))) {
442 src = src.substring(token.raw.length);
443 tokens.push(token);
444 continue;
445 }
446
447 // text
448 // prevent inlineText consuming extensions by clipping 'src' to extension start
449 cutSrc = src;
450 if (this.options.extensions && this.options.extensions.startInline) {
451 let startIndex = Infinity;
452 const tempSrc = src.slice(1);
453 let tempStart;
454 this.options.extensions.startInline.forEach(function(getStartIndex) {
455 tempStart = getStartIndex.call({ lexer: this }, tempSrc);
456 if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
457 });
458 if (startIndex < Infinity && startIndex >= 0) {
459 cutSrc = src.substring(0, startIndex + 1);
460 }
461 }
462 if (token = this.tokenizer.inlineText(cutSrc, smartypants)) {
463 src = src.substring(token.raw.length);
464 if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started
465 prevChar = token.raw.slice(-1);
466 }
467 keepPrevChar = true;
468 lastToken = tokens[tokens.length - 1];
469 if (lastToken && lastToken.type === 'text') {
470 lastToken.raw += token.raw;
471 lastToken.text += token.text;
472 } else {
473 tokens.push(token);
474 }
475 continue;
476 }
477
478 if (src) {
479 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
480 if (this.options.silent) {
481 console.error(errMsg);
482 break;
483 } else {
484 throw new Error(errMsg);
485 }
486 }
487 }
488
489 return tokens;
490 }
491};