UNPKG

9.88 kBJavaScriptView Raw
1const Tokenizer = require('./Tokenizer.js');
2const { defaults } = require('./defaults.js');
3const { block, inline } = require('./rules.js');
4
5/**
6 * smartypants text replacement
7 */
8function smartypants(text) {
9 return text
10 // em-dashes
11 .replace(/---/g, '\u2014')
12 // en-dashes
13 .replace(/--/g, '\u2013')
14 // opening singles
15 .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
16 // closing singles & apostrophes
17 .replace(/'/g, '\u2019')
18 // opening doubles
19 .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
20 // closing doubles
21 .replace(/"/g, '\u201d')
22 // ellipses
23 .replace(/\.{3}/g, '\u2026');
24}
25
26/**
27 * mangle email addresses
28 */
29function mangle(text) {
30 let out = '',
31 i,
32 ch;
33
34 const l = text.length;
35 for (i = 0; i < l; i++) {
36 ch = text.charCodeAt(i);
37 if (Math.random() > 0.5) {
38 ch = 'x' + ch.toString(16);
39 }
40 out += '&#' + ch + ';';
41 }
42
43 return out;
44}
45
46/**
47 * Block Lexer
48 */
49module.exports = class Lexer {
50 constructor(options) {
51 this.tokens = [];
52 this.tokens.links = Object.create(null);
53 this.options = options || defaults;
54 this.options.tokenizer = this.options.tokenizer || new Tokenizer();
55 this.tokenizer = this.options.tokenizer;
56 this.tokenizer.options = this.options;
57
58 const rules = {
59 block: block.normal,
60 inline: inline.normal
61 };
62
63 if (this.options.pedantic) {
64 rules.block = block.pedantic;
65 rules.inline = inline.pedantic;
66 } else if (this.options.gfm) {
67 rules.block = block.gfm;
68 if (this.options.breaks) {
69 rules.inline = inline.breaks;
70 } else {
71 rules.inline = inline.gfm;
72 }
73 }
74 this.tokenizer.rules = rules;
75 }
76
77 /**
78 * Expose Rules
79 */
80 static get rules() {
81 return {
82 block,
83 inline
84 };
85 }
86
87 /**
88 * Static Lex Method
89 */
90 static lex(src, options) {
91 const lexer = new Lexer(options);
92 return lexer.lex(src);
93 }
94
95 /**
96 * Preprocessing
97 */
98 lex(src) {
99 src = src
100 .replace(/\r\n|\r/g, '\n')
101 .replace(/\t/g, ' ');
102
103 this.blockTokens(src, this.tokens, true);
104
105 this.inline(this.tokens);
106
107 return this.tokens;
108 }
109
110 /**
111 * Lexing
112 */
113 blockTokens(src, tokens = [], top = true) {
114 src = src.replace(/^ +$/gm, '');
115 let token, i, l;
116
117 while (src) {
118 // newline
119 if (token = this.tokenizer.space(src)) {
120 src = src.substring(token.raw.length);
121 if (token.type) {
122 tokens.push(token);
123 }
124 continue;
125 }
126
127 // code
128 if (token = this.tokenizer.code(src, tokens)) {
129 src = src.substring(token.raw.length);
130 tokens.push(token);
131 continue;
132 }
133
134 // fences
135 if (token = this.tokenizer.fences(src)) {
136 src = src.substring(token.raw.length);
137 tokens.push(token);
138 continue;
139 }
140
141 // heading
142 if (token = this.tokenizer.heading(src)) {
143 src = src.substring(token.raw.length);
144 tokens.push(token);
145 continue;
146 }
147
148 // table no leading pipe (gfm)
149 if (token = this.tokenizer.nptable(src)) {
150 src = src.substring(token.raw.length);
151 tokens.push(token);
152 continue;
153 }
154
155 // hr
156 if (token = this.tokenizer.hr(src)) {
157 src = src.substring(token.raw.length);
158 tokens.push(token);
159 continue;
160 }
161
162 // blockquote
163 if (token = this.tokenizer.blockquote(src)) {
164 src = src.substring(token.raw.length);
165 token.tokens = this.blockTokens(token.text, [], top);
166 tokens.push(token);
167 continue;
168 }
169
170 // list
171 if (token = this.tokenizer.list(src)) {
172 src = src.substring(token.raw.length);
173 l = token.items.length;
174 for (i = 0; i < l; i++) {
175 token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
176 }
177 tokens.push(token);
178 continue;
179 }
180
181 // html
182 if (token = this.tokenizer.html(src)) {
183 src = src.substring(token.raw.length);
184 tokens.push(token);
185 continue;
186 }
187
188 // def
189 if (top && (token = this.tokenizer.def(src))) {
190 src = src.substring(token.raw.length);
191 if (!this.tokens.links[token.tag]) {
192 this.tokens.links[token.tag] = {
193 href: token.href,
194 title: token.title
195 };
196 }
197 continue;
198 }
199
200 // table (gfm)
201 if (token = this.tokenizer.table(src)) {
202 src = src.substring(token.raw.length);
203 tokens.push(token);
204 continue;
205 }
206
207 // lheading
208 if (token = this.tokenizer.lheading(src)) {
209 src = src.substring(token.raw.length);
210 tokens.push(token);
211 continue;
212 }
213
214 // top-level paragraph
215 if (top && (token = this.tokenizer.paragraph(src))) {
216 src = src.substring(token.raw.length);
217 tokens.push(token);
218 continue;
219 }
220
221 // text
222 if (token = this.tokenizer.text(src)) {
223 src = src.substring(token.raw.length);
224 tokens.push(token);
225 continue;
226 }
227
228 if (src) {
229 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
230 if (this.options.silent) {
231 console.error(errMsg);
232 break;
233 } else {
234 throw new Error(errMsg);
235 }
236 }
237 }
238
239 return tokens;
240 }
241
242 inline(tokens) {
243 let i,
244 j,
245 k,
246 l2,
247 row,
248 token;
249
250 const l = tokens.length;
251 for (i = 0; i < l; i++) {
252 token = tokens[i];
253 switch (token.type) {
254 case 'paragraph':
255 case 'text':
256 case 'heading': {
257 token.tokens = [];
258 this.inlineTokens(token.text, token.tokens);
259 break;
260 }
261 case 'table': {
262 token.tokens = {
263 header: [],
264 cells: []
265 };
266
267 // header
268 l2 = token.header.length;
269 for (j = 0; j < l2; j++) {
270 token.tokens.header[j] = [];
271 this.inlineTokens(token.header[j], token.tokens.header[j]);
272 }
273
274 // cells
275 l2 = token.cells.length;
276 for (j = 0; j < l2; j++) {
277 row = token.cells[j];
278 token.tokens.cells[j] = [];
279 for (k = 0; k < row.length; k++) {
280 token.tokens.cells[j][k] = [];
281 this.inlineTokens(row[k], token.tokens.cells[j][k]);
282 }
283 }
284
285 break;
286 }
287 case 'blockquote': {
288 this.inline(token.tokens);
289 break;
290 }
291 case 'list': {
292 l2 = token.items.length;
293 for (j = 0; j < l2; j++) {
294 this.inline(token.items[j].tokens);
295 }
296 break;
297 }
298 default: {
299 // do nothing
300 }
301 }
302 }
303
304 return tokens;
305 }
306
307 /**
308 * Lexing/Compiling
309 */
310 inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
311 let token;
312
313 while (src) {
314 // escape
315 if (token = this.tokenizer.escape(src)) {
316 src = src.substring(token.raw.length);
317 tokens.push(token);
318 continue;
319 }
320
321 // tag
322 if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
323 src = src.substring(token.raw.length);
324 inLink = token.inLink;
325 inRawBlock = token.inRawBlock;
326 tokens.push(token);
327 continue;
328 }
329
330 // link
331 if (token = this.tokenizer.link(src)) {
332 src = src.substring(token.raw.length);
333 if (token.type === 'link') {
334 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
335 }
336 tokens.push(token);
337 continue;
338 }
339
340 // reflink, nolink
341 if (token = this.tokenizer.reflink(src, this.tokens.links)) {
342 src = src.substring(token.raw.length);
343 if (token.type === 'link') {
344 token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
345 }
346 tokens.push(token);
347 continue;
348 }
349
350 // strong
351 if (token = this.tokenizer.strong(src)) {
352 src = src.substring(token.raw.length);
353 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
354 tokens.push(token);
355 continue;
356 }
357
358 // em
359 if (token = this.tokenizer.em(src)) {
360 src = src.substring(token.raw.length);
361 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
362 tokens.push(token);
363 continue;
364 }
365
366 // code
367 if (token = this.tokenizer.codespan(src)) {
368 src = src.substring(token.raw.length);
369 tokens.push(token);
370 continue;
371 }
372
373 // br
374 if (token = this.tokenizer.br(src)) {
375 src = src.substring(token.raw.length);
376 tokens.push(token);
377 continue;
378 }
379
380 // del (gfm)
381 if (token = this.tokenizer.del(src)) {
382 src = src.substring(token.raw.length);
383 token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
384 tokens.push(token);
385 continue;
386 }
387
388 // autolink
389 if (token = this.tokenizer.autolink(src, mangle)) {
390 src = src.substring(token.raw.length);
391 tokens.push(token);
392 continue;
393 }
394
395 // url (gfm)
396 if (!inLink && (token = this.tokenizer.url(src, mangle))) {
397 src = src.substring(token.raw.length);
398 tokens.push(token);
399 continue;
400 }
401
402 // text
403 if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
404 src = src.substring(token.raw.length);
405 tokens.push(token);
406 continue;
407 }
408
409 if (src) {
410 const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
411 if (this.options.silent) {
412 console.error(errMsg);
413 break;
414 } else {
415 throw new Error(errMsg);
416 }
417 }
418 }
419
420 return tokens;
421 }
422};