UNPKG

13.9 kBJavaScriptView Raw
1import { Tokenizer } from './Tokenizer.js';
2import { defaults } from './defaults.js';
3import { block, inline } from './rules.js';
4import { repeatString } from './helpers.js';
5
6/**
7 * smartypants text replacement
8 */
9function smartypants(text) {
10 return text
11 // em-dashes
12 .replace(/---/g, '\u2014')
13 // en-dashes
14 .replace(/--/g, '\u2013')
15 // opening singles
16 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
17 // closing singles & apostrophes
18 .replace(/'/g, '\u2019')
19 // opening doubles
20 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
21 // closing doubles
22 .replace(/"/g, '\u201d')
23 // ellipses
24 .replace(/\.{3}/g, '\u2026');
25}
26
27/**
28 * mangle email addresses
29 */
30function mangle(text) {
31 let out = '',
32 i,
33 ch;
34
35 const l = text.length;
36 for (i = 0; i < l; i++) {
37 ch = text.charCodeAt(i);
38 if (Math.random() > 0.5) {
39 ch = 'x' + ch.toString(16);
40 }
41 out += '&#' + ch + ';';
42 }
43
44 return out;
45}
46
47/**
48 * Block Lexer
49 */
50export class Lexer {
51 constructor(options) {
52 this.tokens = [];
53 this.tokens.links = Object.create(null);
54 this.options = options || defaults;
55 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
56 this.tokenizer = this.options.tokenizer;
57 this.tokenizer.options = this.options;
58 this.tokenizer.lexer = this;
59 this.inlineQueue = [];
60 this.state = {
61 inLink: false,
62 inRawBlock: false,
63 top: true
64 };
65
66 const rules = {
67 block: block.normal,
68 inline: inline.normal
69 };
70
71 if (this.options.pedantic) {
72 rules.block = block.pedantic;
73 rules.inline = inline.pedantic;
74 } else if (this.options.gfm) {
75 rules.block = block.gfm;
76 if (this.options.breaks) {
77 rules.inline = inline.breaks;
78 } else {
79 rules.inline = inline.gfm;
80 }
81 }
82 this.tokenizer.rules = rules;
83 }
84
85 /**
86 * Expose Rules
87 */
88 static get rules() {
89 return {
90 block,
91 inline
92 };
93 }
94
95 /**
96 * Static Lex Method
97 */
98 static lex(src, options) {
99 const lexer = new Lexer(options);
100 return lexer.lex(src);
101 }
102
103 /**
104 * Static Lex Inline Method
105 */
106 static lexInline(src, options) {
107 const lexer = new Lexer(options);
108 return lexer.inlineTokens(src);
109 }
110
111 /**
112 * Preprocessing
113 */
114 lex(src) {
115 src = src
116 .replace(/\r\n|\r/g, '\n')
117 .replace(/\t/g, ' ');
118
119 this.blockTokens(src, this.tokens);
120
121 let next;
122 while (next = this.inlineQueue.shift()) {
123 this.inlineTokens(next.src, next.tokens);
124 }
125
126 return this.tokens;
127 }
128
129 /**
130 * Lexing
131 */
132 blockTokens(src, tokens = []) {
133 if (this.options.pedantic) {
134 src = src.replace(/^ +$/gm, '');
135 }
136 let token, lastToken, cutSrc, lastParagraphClipped;
137
138 while (src) {
139 if (this.options.extensions
140 && this.options.extensions.block
141 && this.options.extensions.block.some((extTokenizer) => {
142 if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
143 src = src.substring(token.raw.length);
144 tokens.push(token);
145 return true;
146 }
147 return false;
148 })) {
149 continue;
150 }
151
152 // newline
153 if (token = this.tokenizer.space(src)) {
154 src = src.substring(token.raw.length);
155 if (token.raw.length === 1 && tokens.length > 0) {
156 // if there's a single \n as a spacer, it's terminating the last line,
157 // so move it there so that we don't get unecessary paragraph tags
158 tokens[tokens.length - 1].raw += '\n';
159 } else {
160 tokens.push(token);
161 }
162 continue;
163 }
164
165 // code
166 if (token = this.tokenizer.code(src)) {
167 src = src.substring(token.raw.length);
168 lastToken = tokens[tokens.length - 1];
169 // An indented code block cannot interrupt a paragraph.
170 if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
171 lastToken.raw += '\n' + token.raw;
172 lastToken.text += '\n' + token.text;
173 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
174 } else {
175 tokens.push(token);
176 }
177 continue;
178 }
179
180 // fences
181 if (token = this.tokenizer.fences(src)) {
182 src = src.substring(token.raw.length);
183 tokens.push(token);
184 continue;
185 }
186
187 // heading
188 if (token = this.tokenizer.heading(src)) {
189 src = src.substring(token.raw.length);
190 tokens.push(token);
191 continue;
192 }
193
194 // hr
195 if (token = this.tokenizer.hr(src)) {
196 src = src.substring(token.raw.length);
197 tokens.push(token);
198 continue;
199 }
200
201 // blockquote
202 if (token = this.tokenizer.blockquote(src)) {
203 src = src.substring(token.raw.length);
204 tokens.push(token);
205 continue;
206 }
207
208 // list
209 if (token = this.tokenizer.list(src)) {
210 src = src.substring(token.raw.length);
211 tokens.push(token);
212 continue;
213 }
214
215 // html
216 if (token = this.tokenizer.html(src)) {
217 src = src.substring(token.raw.length);
218 tokens.push(token);
219 continue;
220 }
221
222 // def
223 if (token = this.tokenizer.def(src)) {
224 src = src.substring(token.raw.length);
225 lastToken = tokens[tokens.length - 1];
226 if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
227 lastToken.raw += '\n' + token.raw;
228 lastToken.text += '\n' + token.raw;
229 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
230 } else if (!this.tokens.links[token.tag]) {
231 this.tokens.links[token.tag] = {
232 href: token.href,
233 title: token.title
234 };
235 }
236 continue;
237 }
238
239 // table (gfm)
240 if (token = this.tokenizer.table(src)) {
241 src = src.substring(token.raw.length);
242 tokens.push(token);
243 continue;
244 }
245
246 // lheading
247 if (token = this.tokenizer.lheading(src)) {
248 src = src.substring(token.raw.length);
249 tokens.push(token);
250 continue;
251 }
252
253 // top-level paragraph
254 // prevent paragraph consuming extensions by clipping 'src' to extension start
255 cutSrc = src;
256 if (this.options.extensions && this.options.extensions.startBlock) {
257 let startIndex = Infinity;
258 const tempSrc = src.slice(1);
259 let tempStart;
260 this.options.extensions.startBlock.forEach(function(getStartIndex) {
261 tempStart = getStartIndex.call({ lexer: this }, tempSrc);
262 if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
263 });
264 if (startIndex < Infinity && startIndex >= 0) {
265 cutSrc = src.substring(0, startIndex + 1);
266 }
267 }
268 if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) {
269 lastToken = tokens[tokens.length - 1];
270 if (lastParagraphClipped && lastToken.type === 'paragraph') {
271 lastToken.raw += '\n' + token.raw;
272 lastToken.text += '\n' + token.text;
273 this.inlineQueue.pop();
274 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
275 } else {
276 tokens.push(token);
277 }
278 lastParagraphClipped = (cutSrc.length !== src.length);
279 src = src.substring(token.raw.length);
280 continue;
281 }
282
283 // text
284 if (token = this.tokenizer.text(src)) {
285 src = src.substring(token.raw.length);
286 lastToken = tokens[tokens.length - 1];
287 if (lastToken && lastToken.type === 'text') {
288 lastToken.raw += '\n' + token.raw;
289 lastToken.text += '\n' + token.text;
290 this.inlineQueue.pop();
291 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
292 } else {
293 tokens.push(token);
294 }
295 continue;
296 }
297
298 if (src) {
299 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
300 if (this.options.silent) {
301 console.error(errMsg);
302 break;
303 } else {
304 throw new Error(errMsg);
305 }
306 }
307 }
308
309 this.state.top = true;
310 return tokens;
311 }
312
313 inline(src, tokens) {
314 this.inlineQueue.push({ src, tokens });
315 }
316
317 /**
318 * Lexing/Compiling
319 */
320 inlineTokens(src, tokens = []) {
321 let token, lastToken, cutSrc;
322
323 // String with links masked to avoid interference with em and strong
324 let maskedSrc = src;
325 let match;
326 let keepPrevChar, prevChar;
327
328 // Mask out reflinks
329 if (this.tokens.links) {
330 const links = Object.keys(this.tokens.links);
331 if (links.length > 0) {
332 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
333 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
334 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
335 }
336 }
337 }
338 }
339 // Mask out other blocks
340 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
341 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
342 }
343
344 // Mask out escaped em & strong delimiters
345 while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
346 maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
347 }
348
349 while (src) {
350 if (!keepPrevChar) {
351 prevChar = '';
352 }
353 keepPrevChar = false;
354
355 // extensions
356 if (this.options.extensions
357 && this.options.extensions.inline
358 && this.options.extensions.inline.some((extTokenizer) => {
359 if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
360 src = src.substring(token.raw.length);
361 tokens.push(token);
362 return true;
363 }
364 return false;
365 })) {
366 continue;
367 }
368
369 // escape
370 if (token = this.tokenizer.escape(src)) {
371 src = src.substring(token.raw.length);
372 tokens.push(token);
373 continue;
374 }
375
376 // tag
377 if (token = this.tokenizer.tag(src)) {
378 src = src.substring(token.raw.length);
379 lastToken = tokens[tokens.length - 1];
380 if (lastToken && token.type === 'text' && lastToken.type === 'text') {
381 lastToken.raw += token.raw;
382 lastToken.text += token.text;
383 } else {
384 tokens.push(token);
385 }
386 continue;
387 }
388
389 // link
390 if (token = this.tokenizer.link(src)) {
391 src = src.substring(token.raw.length);
392 tokens.push(token);
393 continue;
394 }
395
396 // reflink, nolink
397 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
398 src = src.substring(token.raw.length);
399 lastToken = tokens[tokens.length - 1];
400 if (lastToken && token.type === 'text' && lastToken.type === 'text') {
401 lastToken.raw += token.raw;
402 lastToken.text += token.text;
403 } else {
404 tokens.push(token);
405 }
406 continue;
407 }
408
409 // em & strong
410 if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
411 src = src.substring(token.raw.length);
412 tokens.push(token);
413 continue;
414 }
415
416 // code
417 if (token = this.tokenizer.codespan(src)) {
418 src = src.substring(token.raw.length);
419 tokens.push(token);
420 continue;
421 }
422
423 // br
424 if (token = this.tokenizer.br(src)) {
425 src = src.substring(token.raw.length);
426 tokens.push(token);
427 continue;
428 }
429
430 // del (gfm)
431 if (token = this.tokenizer.del(src)) {
432 src = src.substring(token.raw.length);
433 tokens.push(token);
434 continue;
435 }
436
437 // autolink
438 if (token = this.tokenizer.autolink(src, mangle)) {
439 src = src.substring(token.raw.length);
440 tokens.push(token);
441 continue;
442 }
443
444 // url (gfm)
445 if (!this.state.inLink && (token = this.tokenizer.url(src, mangle))) {
446 src = src.substring(token.raw.length);
447 tokens.push(token);
448 continue;
449 }
450
451 // text
452 // prevent inlineText consuming extensions by clipping 'src' to extension start
453 cutSrc = src;
454 if (this.options.extensions && this.options.extensions.startInline) {
455 let startIndex = Infinity;
456 const tempSrc = src.slice(1);
457 let tempStart;
458 this.options.extensions.startInline.forEach(function(getStartIndex) {
459 tempStart = getStartIndex.call({ lexer: this }, tempSrc);
460 if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
461 });
462 if (startIndex < Infinity && startIndex >= 0) {
463 cutSrc = src.substring(0, startIndex + 1);
464 }
465 }
466 if (token = this.tokenizer.inlineText(cutSrc, smartypants)) {
467 src = src.substring(token.raw.length);
468 if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started
469 prevChar = token.raw.slice(-1);
470 }
471 keepPrevChar = true;
472 lastToken = tokens[tokens.length - 1];
473 if (lastToken && lastToken.type === 'text') {
474 lastToken.raw += token.raw;
475 lastToken.text += token.text;
476 } else {
477 tokens.push(token);
478 }
479 continue;
480 }
481
482 if (src) {
483 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
484 if (this.options.silent) {
485 console.error(errMsg);
486 break;
487 } else {
488 throw new Error(errMsg);
489 }
490 }
491 }
492
493 return tokens;
494 }
495}