UNPKG

14.1 kBJavaScriptView Raw
1import { Tokenizer } from './Tokenizer.js';
2import { defaults } from './defaults.js';
3import { block, inline } from './rules.js';
4import { repeatString } from './helpers.js';
5
6/**
7 * smartypants text replacement
8 * @param {string} text
9 */
10function smartypants(text) {
11 return text
12 // em-dashes
13 .replace(/---/g, '\u2014')
14 // en-dashes
15 .replace(/--/g, '\u2013')
16 // opening singles
17 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
18 // closing singles & apostrophes
19 .replace(/'/g, '\u2019')
20 // opening doubles
21 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
22 // closing doubles
23 .replace(/"/g, '\u201d')
24 // ellipses
25 .replace(/\.{3}/g, '\u2026');
26}
27
28/**
29 * mangle email addresses
30 * @param {string} text
31 */
32function mangle(text) {
33 let out = '',
34 i,
35 ch;
36
37 const l = text.length;
38 for (i = 0; i < l; i++) {
39 ch = text.charCodeAt(i);
40 if (Math.random() > 0.5) {
41 ch = 'x' + ch.toString(16);
42 }
43 out += '&#' + ch + ';';
44 }
45
46 return out;
47}
48
49/**
50 * Block Lexer
51 */
52export class Lexer {
53 constructor(options) {
54 this.tokens = [];
55 this.tokens.links = Object.create(null);
56 this.options = options || defaults;
57 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
58 this.tokenizer = this.options.tokenizer;
59 this.tokenizer.options = this.options;
60 this.tokenizer.lexer = this;
61 this.inlineQueue = [];
62 this.state = {
63 inLink: false,
64 inRawBlock: false,
65 top: true
66 };
67
68 const rules = {
69 block: block.normal,
70 inline: inline.normal
71 };
72
73 if (this.options.pedantic) {
74 rules.block = block.pedantic;
75 rules.inline = inline.pedantic;
76 } else if (this.options.gfm) {
77 rules.block = block.gfm;
78 if (this.options.breaks) {
79 rules.inline = inline.breaks;
80 } else {
81 rules.inline = inline.gfm;
82 }
83 }
84 this.tokenizer.rules = rules;
85 }
86
87 /**
88 * Expose Rules
89 */
90 static get rules() {
91 return {
92 block,
93 inline
94 };
95 }
96
97 /**
98 * Static Lex Method
99 */
100 static lex(src, options) {
101 const lexer = new Lexer(options);
102 return lexer.lex(src);
103 }
104
105 /**
106 * Static Lex Inline Method
107 */
108 static lexInline(src, options) {
109 const lexer = new Lexer(options);
110 return lexer.inlineTokens(src);
111 }
112
113 /**
114 * Preprocessing
115 */
116 lex(src) {
117 src = src
118 .replace(/\r\n|\r/g, '\n');
119
120 this.blockTokens(src, this.tokens);
121
122 let next;
123 while (next = this.inlineQueue.shift()) {
124 this.inlineTokens(next.src, next.tokens);
125 }
126
127 return this.tokens;
128 }
129
130 /**
131 * Lexing
132 */
133 blockTokens(src, tokens = []) {
134 if (this.options.pedantic) {
135 src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
136 } else {
137 src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
138 return leading + ' '.repeat(tabs.length);
139 });
140 }
141
142 let token, lastToken, cutSrc, lastParagraphClipped;
143
144 while (src) {
145 if (this.options.extensions
146 && this.options.extensions.block
147 && this.options.extensions.block.some((extTokenizer) => {
148 if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
149 src = src.substring(token.raw.length);
150 tokens.push(token);
151 return true;
152 }
153 return false;
154 })) {
155 continue;
156 }
157
158 // newline
159 if (token = this.tokenizer.space(src)) {
160 src = src.substring(token.raw.length);
161 if (token.raw.length === 1 && tokens.length > 0) {
162 // if there's a single \n as a spacer, it's terminating the last line,
163 // so move it there so that we don't get unecessary paragraph tags
164 tokens[tokens.length - 1].raw += '\n';
165 } else {
166 tokens.push(token);
167 }
168 continue;
169 }
170
171 // code
172 if (token = this.tokenizer.code(src)) {
173 src = src.substring(token.raw.length);
174 lastToken = tokens[tokens.length - 1];
175 // An indented code block cannot interrupt a paragraph.
176 if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
177 lastToken.raw += '\n' + token.raw;
178 lastToken.text += '\n' + token.text;
179 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
180 } else {
181 tokens.push(token);
182 }
183 continue;
184 }
185
186 // fences
187 if (token = this.tokenizer.fences(src)) {
188 src = src.substring(token.raw.length);
189 tokens.push(token);
190 continue;
191 }
192
193 // heading
194 if (token = this.tokenizer.heading(src)) {
195 src = src.substring(token.raw.length);
196 tokens.push(token);
197 continue;
198 }
199
200 // hr
201 if (token = this.tokenizer.hr(src)) {
202 src = src.substring(token.raw.length);
203 tokens.push(token);
204 continue;
205 }
206
207 // blockquote
208 if (token = this.tokenizer.blockquote(src)) {
209 src = src.substring(token.raw.length);
210 tokens.push(token);
211 continue;
212 }
213
214 // list
215 if (token = this.tokenizer.list(src)) {
216 src = src.substring(token.raw.length);
217 tokens.push(token);
218 continue;
219 }
220
221 // html
222 if (token = this.tokenizer.html(src)) {
223 src = src.substring(token.raw.length);
224 tokens.push(token);
225 continue;
226 }
227
228 // def
229 if (token = this.tokenizer.def(src)) {
230 src = src.substring(token.raw.length);
231 lastToken = tokens[tokens.length - 1];
232 if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
233 lastToken.raw += '\n' + token.raw;
234 lastToken.text += '\n' + token.raw;
235 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
236 } else if (!this.tokens.links[token.tag]) {
237 this.tokens.links[token.tag] = {
238 href: token.href,
239 title: token.title
240 };
241 }
242 continue;
243 }
244
245 // table (gfm)
246 if (token = this.tokenizer.table(src)) {
247 src = src.substring(token.raw.length);
248 tokens.push(token);
249 continue;
250 }
251
252 // lheading
253 if (token = this.tokenizer.lheading(src)) {
254 src = src.substring(token.raw.length);
255 tokens.push(token);
256 continue;
257 }
258
259 // top-level paragraph
260 // prevent paragraph consuming extensions by clipping 'src' to extension start
261 cutSrc = src;
262 if (this.options.extensions && this.options.extensions.startBlock) {
263 let startIndex = Infinity;
264 const tempSrc = src.slice(1);
265 let tempStart;
266 this.options.extensions.startBlock.forEach(function(getStartIndex) {
267 tempStart = getStartIndex.call({ lexer: this }, tempSrc);
268 if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
269 });
270 if (startIndex < Infinity && startIndex >= 0) {
271 cutSrc = src.substring(0, startIndex + 1);
272 }
273 }
274 if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) {
275 lastToken = tokens[tokens.length - 1];
276 if (lastParagraphClipped && lastToken.type === 'paragraph') {
277 lastToken.raw += '\n' + token.raw;
278 lastToken.text += '\n' + token.text;
279 this.inlineQueue.pop();
280 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
281 } else {
282 tokens.push(token);
283 }
284 lastParagraphClipped = (cutSrc.length !== src.length);
285 src = src.substring(token.raw.length);
286 continue;
287 }
288
289 // text
290 if (token = this.tokenizer.text(src)) {
291 src = src.substring(token.raw.length);
292 lastToken = tokens[tokens.length - 1];
293 if (lastToken && lastToken.type === 'text') {
294 lastToken.raw += '\n' + token.raw;
295 lastToken.text += '\n' + token.text;
296 this.inlineQueue.pop();
297 this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
298 } else {
299 tokens.push(token);
300 }
301 continue;
302 }
303
304 if (src) {
305 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
306 if (this.options.silent) {
307 console.error(errMsg);
308 break;
309 } else {
310 throw new Error(errMsg);
311 }
312 }
313 }
314
315 this.state.top = true;
316 return tokens;
317 }
318
319 inline(src, tokens = []) {
320 this.inlineQueue.push({ src, tokens });
321 return tokens;
322 }
323
324 /**
325 * Lexing/Compiling
326 */
327 inlineTokens(src, tokens = []) {
328 let token, lastToken, cutSrc;
329
330 // String with links masked to avoid interference with em and strong
331 let maskedSrc = src;
332 let match;
333 let keepPrevChar, prevChar;
334
335 // Mask out reflinks
336 if (this.tokens.links) {
337 const links = Object.keys(this.tokens.links);
338 if (links.length > 0) {
339 while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
340 if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
341 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
342 }
343 }
344 }
345 }
346 // Mask out other blocks
347 while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
348 maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
349 }
350
351 // Mask out escaped em & strong delimiters
352 while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
353 maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
354 }
355
356 while (src) {
357 if (!keepPrevChar) {
358 prevChar = '';
359 }
360 keepPrevChar = false;
361
362 // extensions
363 if (this.options.extensions
364 && this.options.extensions.inline
365 && this.options.extensions.inline.some((extTokenizer) => {
366 if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
367 src = src.substring(token.raw.length);
368 tokens.push(token);
369 return true;
370 }
371 return false;
372 })) {
373 continue;
374 }
375
376 // escape
377 if (token = this.tokenizer.escape(src)) {
378 src = src.substring(token.raw.length);
379 tokens.push(token);
380 continue;
381 }
382
383 // tag
384 if (token = this.tokenizer.tag(src)) {
385 src = src.substring(token.raw.length);
386 lastToken = tokens[tokens.length - 1];
387 if (lastToken && token.type === 'text' && lastToken.type === 'text') {
388 lastToken.raw += token.raw;
389 lastToken.text += token.text;
390 } else {
391 tokens.push(token);
392 }
393 continue;
394 }
395
396 // link
397 if (token = this.tokenizer.link(src)) {
398 src = src.substring(token.raw.length);
399 tokens.push(token);
400 continue;
401 }
402
403 // reflink, nolink
404 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
405 src = src.substring(token.raw.length);
406 lastToken = tokens[tokens.length - 1];
407 if (lastToken && token.type === 'text' && lastToken.type === 'text') {
408 lastToken.raw += token.raw;
409 lastToken.text += token.text;
410 } else {
411 tokens.push(token);
412 }
413 continue;
414 }
415
416 // em & strong
417 if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
418 src = src.substring(token.raw.length);
419 tokens.push(token);
420 continue;
421 }
422
423 // code
424 if (token = this.tokenizer.codespan(src)) {
425 src = src.substring(token.raw.length);
426 tokens.push(token);
427 continue;
428 }
429
430 // br
431 if (token = this.tokenizer.br(src)) {
432 src = src.substring(token.raw.length);
433 tokens.push(token);
434 continue;
435 }
436
437 // del (gfm)
438 if (token = this.tokenizer.del(src)) {
439 src = src.substring(token.raw.length);
440 tokens.push(token);
441 continue;
442 }
443
444 // autolink
445 if (token = this.tokenizer.autolink(src, mangle)) {
446 src = src.substring(token.raw.length);
447 tokens.push(token);
448 continue;
449 }
450
451 // url (gfm)
452 if (!this.state.inLink && (token = this.tokenizer.url(src, mangle))) {
453 src = src.substring(token.raw.length);
454 tokens.push(token);
455 continue;
456 }
457
458 // text
459 // prevent inlineText consuming extensions by clipping 'src' to extension start
460 cutSrc = src;
461 if (this.options.extensions && this.options.extensions.startInline) {
462 let startIndex = Infinity;
463 const tempSrc = src.slice(1);
464 let tempStart;
465 this.options.extensions.startInline.forEach(function(getStartIndex) {
466 tempStart = getStartIndex.call({ lexer: this }, tempSrc);
467 if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
468 });
469 if (startIndex < Infinity && startIndex >= 0) {
470 cutSrc = src.substring(0, startIndex + 1);
471 }
472 }
473 if (token = this.tokenizer.inlineText(cutSrc, smartypants)) {
474 src = src.substring(token.raw.length);
475 if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started
476 prevChar = token.raw.slice(-1);
477 }
478 keepPrevChar = true;
479 lastToken = tokens[tokens.length - 1];
480 if (lastToken && lastToken.type === 'text') {
481 lastToken.raw += token.raw;
482 lastToken.text += token.text;
483 } else {
484 tokens.push(token);
485 }
486 continue;
487 }
488
489 if (src) {
490 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
491 if (this.options.silent) {
492 console.error(errMsg);
493 break;
494 } else {
495 throw new Error(errMsg);
496 }
497 }
498 }
499
500 return tokens;
501 }
502}