1 | const Tokenizer = require('./Tokenizer.js');
|
2 | const { defaults } = require('./defaults.js');
|
3 | const { block, inline } = require('./rules.js');
|
4 |
|
5 |
|
6 |
|
7 |
|
8 | function smartypants(text) {
|
9 | return text
|
10 |
|
11 | .replace(/---/g, '\u2014')
|
12 |
|
13 | .replace(/--/g, '\u2013')
|
14 |
|
15 | .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
|
16 |
|
17 | .replace(/'/g, '\u2019')
|
18 |
|
19 | .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
|
20 |
|
21 | .replace(/"/g, '\u201d')
|
22 |
|
23 | .replace(/\.{3}/g, '\u2026');
|
24 | }
|
25 |
|
26 |
|
27 |
|
28 |
|
29 | function mangle(text) {
|
30 | let out = '',
|
31 | i,
|
32 | ch;
|
33 |
|
34 | const l = text.length;
|
35 | for (i = 0; i < l; i++) {
|
36 | ch = text.charCodeAt(i);
|
37 | if (Math.random() > 0.5) {
|
38 | ch = 'x' + ch.toString(16);
|
39 | }
|
40 | out += '&#' + ch + ';';
|
41 | }
|
42 |
|
43 | return out;
|
44 | }
|
45 |
|
46 |
|
47 |
|
48 |
|
49 | module.exports = class Lexer {
|
50 | constructor(options) {
|
51 | this.tokens = [];
|
52 | this.tokens.links = Object.create(null);
|
53 | this.options = options || defaults;
|
54 | this.options.tokenizer = this.options.tokenizer || new Tokenizer();
|
55 | this.tokenizer = this.options.tokenizer;
|
56 | this.tokenizer.options = this.options;
|
57 |
|
58 | const rules = {
|
59 | block: block.normal,
|
60 | inline: inline.normal
|
61 | };
|
62 |
|
63 | if (this.options.pedantic) {
|
64 | rules.block = block.pedantic;
|
65 | rules.inline = inline.pedantic;
|
66 | } else if (this.options.gfm) {
|
67 | rules.block = block.gfm;
|
68 | if (this.options.breaks) {
|
69 | rules.inline = inline.breaks;
|
70 | } else {
|
71 | rules.inline = inline.gfm;
|
72 | }
|
73 | }
|
74 | this.tokenizer.rules = rules;
|
75 | }
|
76 |
|
77 | |
78 |
|
79 |
|
80 | static get rules() {
|
81 | return {
|
82 | block,
|
83 | inline
|
84 | };
|
85 | }
|
86 |
|
87 | |
88 |
|
89 |
|
90 | static lex(src, options) {
|
91 | const lexer = new Lexer(options);
|
92 | return lexer.lex(src);
|
93 | }
|
94 |
|
95 | |
96 |
|
97 |
|
98 | lex(src) {
|
99 | src = src
|
100 | .replace(/\r\n|\r/g, '\n')
|
101 | .replace(/\t/g, ' ');
|
102 |
|
103 | this.blockTokens(src, this.tokens, true);
|
104 |
|
105 | this.inline(this.tokens);
|
106 |
|
107 | return this.tokens;
|
108 | }
|
109 |
|
110 | |
111 |
|
112 |
|
113 | blockTokens(src, tokens = [], top = true) {
|
114 | src = src.replace(/^ +$/gm, '');
|
115 | let token, i, l;
|
116 |
|
117 | while (src) {
|
118 |
|
119 | if (token = this.tokenizer.space(src)) {
|
120 | src = src.substring(token.raw.length);
|
121 | if (token.type) {
|
122 | tokens.push(token);
|
123 | }
|
124 | continue;
|
125 | }
|
126 |
|
127 |
|
128 | if (token = this.tokenizer.code(src, tokens)) {
|
129 | src = src.substring(token.raw.length);
|
130 | tokens.push(token);
|
131 | continue;
|
132 | }
|
133 |
|
134 |
|
135 | if (token = this.tokenizer.fences(src)) {
|
136 | src = src.substring(token.raw.length);
|
137 | tokens.push(token);
|
138 | continue;
|
139 | }
|
140 |
|
141 |
|
142 | if (token = this.tokenizer.heading(src)) {
|
143 | src = src.substring(token.raw.length);
|
144 | tokens.push(token);
|
145 | continue;
|
146 | }
|
147 |
|
148 |
|
149 | if (token = this.tokenizer.nptable(src)) {
|
150 | src = src.substring(token.raw.length);
|
151 | tokens.push(token);
|
152 | continue;
|
153 | }
|
154 |
|
155 |
|
156 | if (token = this.tokenizer.hr(src)) {
|
157 | src = src.substring(token.raw.length);
|
158 | tokens.push(token);
|
159 | continue;
|
160 | }
|
161 |
|
162 |
|
163 | if (token = this.tokenizer.blockquote(src)) {
|
164 | src = src.substring(token.raw.length);
|
165 | token.tokens = this.blockTokens(token.text, [], top);
|
166 | tokens.push(token);
|
167 | continue;
|
168 | }
|
169 |
|
170 |
|
171 | if (token = this.tokenizer.list(src)) {
|
172 | src = src.substring(token.raw.length);
|
173 | l = token.items.length;
|
174 | for (i = 0; i < l; i++) {
|
175 | token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
|
176 | }
|
177 | tokens.push(token);
|
178 | continue;
|
179 | }
|
180 |
|
181 |
|
182 | if (token = this.tokenizer.html(src)) {
|
183 | src = src.substring(token.raw.length);
|
184 | tokens.push(token);
|
185 | continue;
|
186 | }
|
187 |
|
188 |
|
189 | if (top && (token = this.tokenizer.def(src))) {
|
190 | src = src.substring(token.raw.length);
|
191 | if (!this.tokens.links[token.tag]) {
|
192 | this.tokens.links[token.tag] = {
|
193 | href: token.href,
|
194 | title: token.title
|
195 | };
|
196 | }
|
197 | continue;
|
198 | }
|
199 |
|
200 |
|
201 | if (token = this.tokenizer.table(src)) {
|
202 | src = src.substring(token.raw.length);
|
203 | tokens.push(token);
|
204 | continue;
|
205 | }
|
206 |
|
207 |
|
208 | if (token = this.tokenizer.lheading(src)) {
|
209 | src = src.substring(token.raw.length);
|
210 | tokens.push(token);
|
211 | continue;
|
212 | }
|
213 |
|
214 |
|
215 | if (top && (token = this.tokenizer.paragraph(src))) {
|
216 | src = src.substring(token.raw.length);
|
217 | tokens.push(token);
|
218 | continue;
|
219 | }
|
220 |
|
221 |
|
222 | if (token = this.tokenizer.text(src)) {
|
223 | src = src.substring(token.raw.length);
|
224 | tokens.push(token);
|
225 | continue;
|
226 | }
|
227 |
|
228 | if (src) {
|
229 | const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
|
230 | if (this.options.silent) {
|
231 | console.error(errMsg);
|
232 | break;
|
233 | } else {
|
234 | throw new Error(errMsg);
|
235 | }
|
236 | }
|
237 | }
|
238 |
|
239 | return tokens;
|
240 | }
|
241 |
|
242 | inline(tokens) {
|
243 | let i,
|
244 | j,
|
245 | k,
|
246 | l2,
|
247 | row,
|
248 | token;
|
249 |
|
250 | const l = tokens.length;
|
251 | for (i = 0; i < l; i++) {
|
252 | token = tokens[i];
|
253 | switch (token.type) {
|
254 | case 'paragraph':
|
255 | case 'text':
|
256 | case 'heading': {
|
257 | token.tokens = [];
|
258 | this.inlineTokens(token.text, token.tokens);
|
259 | break;
|
260 | }
|
261 | case 'table': {
|
262 | token.tokens = {
|
263 | header: [],
|
264 | cells: []
|
265 | };
|
266 |
|
267 |
|
268 | l2 = token.header.length;
|
269 | for (j = 0; j < l2; j++) {
|
270 | token.tokens.header[j] = [];
|
271 | this.inlineTokens(token.header[j], token.tokens.header[j]);
|
272 | }
|
273 |
|
274 |
|
275 | l2 = token.cells.length;
|
276 | for (j = 0; j < l2; j++) {
|
277 | row = token.cells[j];
|
278 | token.tokens.cells[j] = [];
|
279 | for (k = 0; k < row.length; k++) {
|
280 | token.tokens.cells[j][k] = [];
|
281 | this.inlineTokens(row[k], token.tokens.cells[j][k]);
|
282 | }
|
283 | }
|
284 |
|
285 | break;
|
286 | }
|
287 | case 'blockquote': {
|
288 | this.inline(token.tokens);
|
289 | break;
|
290 | }
|
291 | case 'list': {
|
292 | l2 = token.items.length;
|
293 | for (j = 0; j < l2; j++) {
|
294 | this.inline(token.items[j].tokens);
|
295 | }
|
296 | break;
|
297 | }
|
298 | default: {
|
299 |
|
300 | }
|
301 | }
|
302 | }
|
303 |
|
304 | return tokens;
|
305 | }
|
306 |
|
307 | |
308 |
|
309 |
|
310 | inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
|
311 | let token;
|
312 |
|
313 | while (src) {
|
314 |
|
315 | if (token = this.tokenizer.escape(src)) {
|
316 | src = src.substring(token.raw.length);
|
317 | tokens.push(token);
|
318 | continue;
|
319 | }
|
320 |
|
321 |
|
322 | if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
|
323 | src = src.substring(token.raw.length);
|
324 | inLink = token.inLink;
|
325 | inRawBlock = token.inRawBlock;
|
326 | tokens.push(token);
|
327 | continue;
|
328 | }
|
329 |
|
330 |
|
331 | if (token = this.tokenizer.link(src)) {
|
332 | src = src.substring(token.raw.length);
|
333 | if (token.type === 'link') {
|
334 | token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
|
335 | }
|
336 | tokens.push(token);
|
337 | continue;
|
338 | }
|
339 |
|
340 |
|
341 | if (token = this.tokenizer.reflink(src, this.tokens.links)) {
|
342 | src = src.substring(token.raw.length);
|
343 | if (token.type === 'link') {
|
344 | token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
|
345 | }
|
346 | tokens.push(token);
|
347 | continue;
|
348 | }
|
349 |
|
350 |
|
351 | if (token = this.tokenizer.strong(src)) {
|
352 | src = src.substring(token.raw.length);
|
353 | token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
|
354 | tokens.push(token);
|
355 | continue;
|
356 | }
|
357 |
|
358 |
|
359 | if (token = this.tokenizer.em(src)) {
|
360 | src = src.substring(token.raw.length);
|
361 | token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
|
362 | tokens.push(token);
|
363 | continue;
|
364 | }
|
365 |
|
366 |
|
367 | if (token = this.tokenizer.codespan(src)) {
|
368 | src = src.substring(token.raw.length);
|
369 | tokens.push(token);
|
370 | continue;
|
371 | }
|
372 |
|
373 |
|
374 | if (token = this.tokenizer.br(src)) {
|
375 | src = src.substring(token.raw.length);
|
376 | tokens.push(token);
|
377 | continue;
|
378 | }
|
379 |
|
380 |
|
381 | if (token = this.tokenizer.del(src)) {
|
382 | src = src.substring(token.raw.length);
|
383 | token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
|
384 | tokens.push(token);
|
385 | continue;
|
386 | }
|
387 |
|
388 |
|
389 | if (token = this.tokenizer.autolink(src, mangle)) {
|
390 | src = src.substring(token.raw.length);
|
391 | tokens.push(token);
|
392 | continue;
|
393 | }
|
394 |
|
395 |
|
396 | if (!inLink && (token = this.tokenizer.url(src, mangle))) {
|
397 | src = src.substring(token.raw.length);
|
398 | tokens.push(token);
|
399 | continue;
|
400 | }
|
401 |
|
402 |
|
403 | if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
|
404 | src = src.substring(token.raw.length);
|
405 | tokens.push(token);
|
406 | continue;
|
407 | }
|
408 |
|
409 | if (src) {
|
410 | const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
|
411 | if (this.options.silent) {
|
412 | console.error(errMsg);
|
413 | break;
|
414 | } else {
|
415 | throw new Error(errMsg);
|
416 | }
|
417 | }
|
418 | }
|
419 |
|
420 | return tokens;
|
421 | }
|
422 | };
|