UNPKG

10.3 kBJavaScriptView Raw
1const Tokenizer = require('./Tokenizer.js');
2const { defaults } = require('./defaults.js');
3const { block, inline } = require('./rules.js');
4
5/**
6 * smartypants text replacement
7 */
8function smartypants(text) {
9 return text
10 // em-dashes
11 .replace(/---/g, '\u2014')
12 // en-dashes
13 .replace(/--/g, '\u2013')
14 // opening singles
15 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
16 // closing singles & apostrophes
17 .replace(/'/g, '\u2019')
18 // opening doubles
19 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
20 // closing doubles
21 .replace(/"/g, '\u201d')
22 // ellipses
23 .replace(/\.{3}/g, '\u2026');
24}
25
26/**
27 * mangle email addresses
28 */
29function mangle(text) {
30 let out = '',
31 i,
32 ch;
33
34 const l = text.length;
35 for (i = 0; i < l; i++) {
36 ch = text.charCodeAt(i);
37 if (Math.random() > 0.5) {
38 ch = 'x' + ch.toString(16);
39 }
40 out += '&#' + ch + ';';
41 }
42
43 return out;
44}
45
46/**
47 * Block Lexer
48 */
49module.exports = class Lexer {
50 constructor(options) {
51 this.tokens = [];
52 this.tokens.links = Object.create(null);
53 this.options = options || defaults;
54 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
55 this.tokenizer = this.options.tokenizer;
56 this.tokenizer.options = this.options;
57
58 const rules = {
59 block: block.normal,
60 inline: inline.normal
61 };
62
63 if (this.options.pedantic) {
64 rules.block = block.pedantic;
65 rules.inline = inline.pedantic;
66 } else if (this.options.gfm) {
67 rules.block = block.gfm;
68 if (this.options.breaks) {
69 rules.inline = inline.breaks;
70 } else {
71 rules.inline = inline.gfm;
72 }
73 }
74 this.tokenizer.rules = rules;
75 }
76
77 /**
78 * Expose Rules
79 */
80 static get rules() {
81 return {
82 block,
83 inline
84 };
85 }
86
87 /**
88 * Static Lex Method
89 */
90 static lex(src, options) {
91 const lexer = new Lexer(options);
92 return lexer.lex(src);
93 }
94
95 /**
96 * Preprocessing
97 */
98 lex(src) {
99 src = src
100 .replace(/\r\n|\r/g, '\n')
101 .replace(/\t/g, ' ');
102
103 this.blockTokens(src, this.tokens, true);
104
105 this.inline(this.tokens);
106
107 return this.tokens;
108 }
109
110 /**
111 * Lexing
112 */
113 blockTokens(src, tokens = [], top = true) {
114 src = src.replace(/^ +$/gm, '');
115 let token, i, l, lastToken;
116
117 while (src) {
118 // newline
119 if (token = this.tokenizer.space(src)) {
120 src = src.substring(token.raw.length);
121 if (token.type) {
122 tokens.push(token);
123 }
124 continue;
125 }
126
127 // code
128 if (token = this.tokenizer.code(src, tokens)) {
129 src = src.substring(token.raw.length);
130 if (token.type) {
131 tokens.push(token);
132 } else {
133 lastToken = tokens[tokens.length - 1];
134 lastToken.raw += '\n' + token.raw;
135 lastToken.text += '\n' + token.text;
136 }
137 continue;
138 }
139
140 // fences
141 if (token = this.tokenizer.fences(src)) {
142 src = src.substring(token.raw.length);
143 tokens.push(token);
144 continue;
145 }
146
147 // heading
148 if (token = this.tokenizer.heading(src)) {
149 src = src.substring(token.raw.length);
150 tokens.push(token);
151 continue;
152 }
153
154 // table no leading pipe (gfm)
155 if (token = this.tokenizer.nptable(src)) {
156 src = src.substring(token.raw.length);
157 tokens.push(token);
158 continue;
159 }
160
161 // hr
162 if (token = this.tokenizer.hr(src)) {
163 src = src.substring(token.raw.length);
164 tokens.push(token);
165 continue;
166 }
167
168 // blockquote
169 if (token = this.tokenizer.blockquote(src)) {
170 src = src.substring(token.raw.length);
171 token.tokens = this.blockTokens(token.text, [], top);
172 tokens.push(token);
173 continue;
174 }
175
176 // list
177 if (token = this.tokenizer.list(src)) {
178 src = src.substring(token.raw.length);
179 l = token.items.length;
180 for (i = 0; i < l; i++) {
181 token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
182 }
183 tokens.push(token);
184 continue;
185 }
186
187 // html
188 if (token = this.tokenizer.html(src)) {
189 src = src.substring(token.raw.length);
190 tokens.push(token);
191 continue;
192 }
193
194 // def
195 if (top && (token = this.tokenizer.def(src))) {
196 src = src.substring(token.raw.length);
197 if (!this.tokens.links[token.tag]) {
198 this.tokens.links[token.tag] = {
199 href: token.href,
200 title: token.title
201 };
202 }
203 continue;
204 }
205
206 // table (gfm)
207 if (token = this.tokenizer.table(src)) {
208 src = src.substring(token.raw.length);
209 tokens.push(token);
210 continue;
211 }
212
213 // lheading
214 if (token = this.tokenizer.lheading(src)) {
215 src = src.substring(token.raw.length);
216 tokens.push(token);
217 continue;
218 }
219
220 // top-level paragraph
221 if (top && (token = this.tokenizer.paragraph(src))) {
222 src = src.substring(token.raw.length);
223 tokens.push(token);
224 continue;
225 }
226
227 // text
228 if (token = this.tokenizer.text(src, tokens)) {
229 src = src.substring(token.raw.length);
230 if (token.type) {
231 tokens.push(token);
232 } else {
233 lastToken = tokens[tokens.length - 1];
234 lastToken.raw += '\n' + token.raw;
235 lastToken.text += '\n' + token.text;
236 }
237 continue;
238 }
239
240 if (src) {
241 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
242 if (this.options.silent) {
243 console.error(errMsg);
244 break;
245 } else {
246 throw new Error(errMsg);
247 }
248 }
249 }
250
251 return tokens;
252 }
253
254 inline(tokens) {
255 let i,
256 j,
257 k,
258 l2,
259 row,
260 token;
261
262 const l = tokens.length;
263 for (i = 0; i < l; i++) {
264 token = tokens[i];
265 switch (token.type) {
266 case 'paragraph':
267 case 'text':
268 case 'heading': {
269 token.tokens = [];
270 this.inlineTokens(token.text, token.tokens);
271 break;
272 }
273 case 'table': {
274 token.tokens = {
275 header: [],
276 cells: []
277 };
278
279 // header
280 l2 = token.header.length;
281 for (j = 0; j < l2; j++) {
282 token.tokens.header[j] = [];
283 this.inlineTokens(token.header[j], token.tokens.header[j]);
284 }
285
286 // cells
287 l2 = token.cells.length;
288 for (j = 0; j < l2; j++) {
289 row = token.cells[j];
290 token.tokens.cells[j] = [];
291 for (k = 0; k < row.length; k++) {
292 token.tokens.cells[j][k] = [];
293 this.inlineTokens(row[k], token.tokens.cells[j][k]);
294 }
295 }
296
297 break;
298 }
299 case 'blockquote': {
300 this.inline(token.tokens);
301 break;
302 }
303 case 'list': {
304 l2 = token.items.length;
305 for (j = 0; j < l2; j++) {
306 this.inline(token.items[j].tokens);
307 }
308 break;
309 }
310 default: {
311 // do nothing
312 }
313 }
314 }
315
316 return tokens;
317 }
318
319 /**
320 * Lexing/Compiling
321 */
322 inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
323 let token;
324
325 while (src) {
326 // escape
327 if (token = this.tokenizer.escape(src)) {
328 src = src.substring(token.raw.length);
329 tokens.push(token);
330 continue;
331 }
332
333 // tag
334 if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
335 src = src.substring(token.raw.length);
336 inLink = token.inLink;
337 inRawBlock = token.inRawBlock;
338 tokens.push(token);
339 continue;
340 }
341
342 // link
343 if (token = this.tokenizer.link(src)) {
344 src = src.substring(token.raw.length);
345 if (token.type === 'link') {
346 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
347 }
348 tokens.push(token);
349 continue;
350 }
351
352 // reflink, nolink
353 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
354 src = src.substring(token.raw.length);
355 if (token.type === 'link') {
356 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
357 }
358 tokens.push(token);
359 continue;
360 }
361
362 // strong
363 if (token = this.tokenizer.strong(src)) {
364 src = src.substring(token.raw.length);
365 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
366 tokens.push(token);
367 continue;
368 }
369
370 // em
371 if (token = this.tokenizer.em(src)) {
372 src = src.substring(token.raw.length);
373 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
374 tokens.push(token);
375 continue;
376 }
377
378 // code
379 if (token = this.tokenizer.codespan(src)) {
380 src = src.substring(token.raw.length);
381 tokens.push(token);
382 continue;
383 }
384
385 // br
386 if (token = this.tokenizer.br(src)) {
387 src = src.substring(token.raw.length);
388 tokens.push(token);
389 continue;
390 }
391
392 // del (gfm)
393 if (token = this.tokenizer.del(src)) {
394 src = src.substring(token.raw.length);
395 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
396 tokens.push(token);
397 continue;
398 }
399
400 // autolink
401 if (token = this.tokenizer.autolink(src, mangle)) {
402 src = src.substring(token.raw.length);
403 tokens.push(token);
404 continue;
405 }
406
407 // url (gfm)
408 if (!inLink && (token = this.tokenizer.url(src, mangle))) {
409 src = src.substring(token.raw.length);
410 tokens.push(token);
411 continue;
412 }
413
414 // text
415 if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
416 src = src.substring(token.raw.length);
417 tokens.push(token);
418 continue;
419 }
420
421 if (src) {
422 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
423 if (this.options.silent) {
424 console.error(errMsg);
425 break;
426 } else {
427 throw new Error(errMsg);
428 }
429 }
430 }
431
432 return tokens;
433 }
434};