1 | import { Tokenizer } from './Tokenizer.js';
|
2 | import { defaults } from './defaults.js';
|
3 | import { block, inline } from './rules.js';
|
4 | import { repeatString } from './helpers.js';
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 | function smartypants(text) {
|
11 | return text
|
12 |
|
13 | .replace(/---/g, '\u2014')
|
14 |
|
15 | .replace(/--/g, '\u2013')
|
16 |
|
17 | .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
|
18 |
|
19 | .replace(/'/g, '\u2019')
|
20 |
|
21 | .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
|
22 |
|
23 | .replace(/"/g, '\u201d')
|
24 |
|
25 | .replace(/\.{3}/g, '\u2026');
|
26 | }
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 | function mangle(text) {
|
33 | let out = '',
|
34 | i,
|
35 | ch;
|
36 |
|
37 | const l = text.length;
|
38 | for (i = 0; i < l; i++) {
|
39 | ch = text.charCodeAt(i);
|
40 | if (Math.random() > 0.5) {
|
41 | ch = 'x' + ch.toString(16);
|
42 | }
|
43 | out += '&#' + ch + ';';
|
44 | }
|
45 |
|
46 | return out;
|
47 | }
|
48 |
|
49 |
|
50 |
|
51 |
|
52 | export class Lexer {
|
53 | constructor(options) {
|
54 | this.tokens = [];
|
55 | this.tokens.links = Object.create(null);
|
56 | this.options = options || defaults;
|
57 | this.options.tokenizer = this.options.tokenizer || new Tokenizer();
|
58 | this.tokenizer = this.options.tokenizer;
|
59 | this.tokenizer.options = this.options;
|
60 | this.tokenizer.lexer = this;
|
61 | this.inlineQueue = [];
|
62 | this.state = {
|
63 | inLink: false,
|
64 | inRawBlock: false,
|
65 | top: true
|
66 | };
|
67 |
|
68 | const rules = {
|
69 | block: block.normal,
|
70 | inline: inline.normal
|
71 | };
|
72 |
|
73 | if (this.options.pedantic) {
|
74 | rules.block = block.pedantic;
|
75 | rules.inline = inline.pedantic;
|
76 | } else if (this.options.gfm) {
|
77 | rules.block = block.gfm;
|
78 | if (this.options.breaks) {
|
79 | rules.inline = inline.breaks;
|
80 | } else {
|
81 | rules.inline = inline.gfm;
|
82 | }
|
83 | }
|
84 | this.tokenizer.rules = rules;
|
85 | }
|
86 |
|
87 | |
88 |
|
89 |
|
90 | static get rules() {
|
91 | return {
|
92 | block,
|
93 | inline
|
94 | };
|
95 | }
|
96 |
|
97 | |
98 |
|
99 |
|
100 | static lex(src, options) {
|
101 | const lexer = new Lexer(options);
|
102 | return lexer.lex(src);
|
103 | }
|
104 |
|
105 | |
106 |
|
107 |
|
108 | static lexInline(src, options) {
|
109 | const lexer = new Lexer(options);
|
110 | return lexer.inlineTokens(src);
|
111 | }
|
112 |
|
113 | |
114 |
|
115 |
|
116 | lex(src) {
|
117 | src = src
|
118 | .replace(/\r\n|\r/g, '\n');
|
119 |
|
120 | this.blockTokens(src, this.tokens);
|
121 |
|
122 | let next;
|
123 | while (next = this.inlineQueue.shift()) {
|
124 | this.inlineTokens(next.src, next.tokens);
|
125 | }
|
126 |
|
127 | return this.tokens;
|
128 | }
|
129 |
|
130 | |
131 |
|
132 |
|
133 | blockTokens(src, tokens = []) {
|
134 | if (this.options.pedantic) {
|
135 | src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
|
136 | } else {
|
137 | src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
|
138 | return leading + ' '.repeat(tabs.length);
|
139 | });
|
140 | }
|
141 |
|
142 | let token, lastToken, cutSrc, lastParagraphClipped;
|
143 |
|
144 | while (src) {
|
145 | if (this.options.extensions
|
146 | && this.options.extensions.block
|
147 | && this.options.extensions.block.some((extTokenizer) => {
|
148 | if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
|
149 | src = src.substring(token.raw.length);
|
150 | tokens.push(token);
|
151 | return true;
|
152 | }
|
153 | return false;
|
154 | })) {
|
155 | continue;
|
156 | }
|
157 |
|
158 |
|
159 | if (token = this.tokenizer.space(src)) {
|
160 | src = src.substring(token.raw.length);
|
161 | if (token.raw.length === 1 && tokens.length > 0) {
|
162 |
|
163 |
|
164 | tokens[tokens.length - 1].raw += '\n';
|
165 | } else {
|
166 | tokens.push(token);
|
167 | }
|
168 | continue;
|
169 | }
|
170 |
|
171 |
|
172 | if (token = this.tokenizer.code(src)) {
|
173 | src = src.substring(token.raw.length);
|
174 | lastToken = tokens[tokens.length - 1];
|
175 |
|
176 | if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
|
177 | lastToken.raw += '\n' + token.raw;
|
178 | lastToken.text += '\n' + token.text;
|
179 | this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
|
180 | } else {
|
181 | tokens.push(token);
|
182 | }
|
183 | continue;
|
184 | }
|
185 |
|
186 |
|
187 | if (token = this.tokenizer.fences(src)) {
|
188 | src = src.substring(token.raw.length);
|
189 | tokens.push(token);
|
190 | continue;
|
191 | }
|
192 |
|
193 |
|
194 | if (token = this.tokenizer.heading(src)) {
|
195 | src = src.substring(token.raw.length);
|
196 | tokens.push(token);
|
197 | continue;
|
198 | }
|
199 |
|
200 |
|
201 | if (token = this.tokenizer.hr(src)) {
|
202 | src = src.substring(token.raw.length);
|
203 | tokens.push(token);
|
204 | continue;
|
205 | }
|
206 |
|
207 |
|
208 | if (token = this.tokenizer.blockquote(src)) {
|
209 | src = src.substring(token.raw.length);
|
210 | tokens.push(token);
|
211 | continue;
|
212 | }
|
213 |
|
214 |
|
215 | if (token = this.tokenizer.list(src)) {
|
216 | src = src.substring(token.raw.length);
|
217 | tokens.push(token);
|
218 | continue;
|
219 | }
|
220 |
|
221 |
|
222 | if (token = this.tokenizer.html(src)) {
|
223 | src = src.substring(token.raw.length);
|
224 | tokens.push(token);
|
225 | continue;
|
226 | }
|
227 |
|
228 |
|
229 | if (token = this.tokenizer.def(src)) {
|
230 | src = src.substring(token.raw.length);
|
231 | lastToken = tokens[tokens.length - 1];
|
232 | if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
|
233 | lastToken.raw += '\n' + token.raw;
|
234 | lastToken.text += '\n' + token.raw;
|
235 | this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
|
236 | } else if (!this.tokens.links[token.tag]) {
|
237 | this.tokens.links[token.tag] = {
|
238 | href: token.href,
|
239 | title: token.title
|
240 | };
|
241 | }
|
242 | continue;
|
243 | }
|
244 |
|
245 |
|
246 | if (token = this.tokenizer.table(src)) {
|
247 | src = src.substring(token.raw.length);
|
248 | tokens.push(token);
|
249 | continue;
|
250 | }
|
251 |
|
252 |
|
253 | if (token = this.tokenizer.lheading(src)) {
|
254 | src = src.substring(token.raw.length);
|
255 | tokens.push(token);
|
256 | continue;
|
257 | }
|
258 |
|
259 |
|
260 |
|
261 | cutSrc = src;
|
262 | if (this.options.extensions && this.options.extensions.startBlock) {
|
263 | let startIndex = Infinity;
|
264 | const tempSrc = src.slice(1);
|
265 | let tempStart;
|
266 | this.options.extensions.startBlock.forEach(function(getStartIndex) {
|
267 | tempStart = getStartIndex.call({ lexer: this }, tempSrc);
|
268 | if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
|
269 | });
|
270 | if (startIndex < Infinity && startIndex >= 0) {
|
271 | cutSrc = src.substring(0, startIndex + 1);
|
272 | }
|
273 | }
|
274 | if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) {
|
275 | lastToken = tokens[tokens.length - 1];
|
276 | if (lastParagraphClipped && lastToken.type === 'paragraph') {
|
277 | lastToken.raw += '\n' + token.raw;
|
278 | lastToken.text += '\n' + token.text;
|
279 | this.inlineQueue.pop();
|
280 | this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
|
281 | } else {
|
282 | tokens.push(token);
|
283 | }
|
284 | lastParagraphClipped = (cutSrc.length !== src.length);
|
285 | src = src.substring(token.raw.length);
|
286 | continue;
|
287 | }
|
288 |
|
289 |
|
290 | if (token = this.tokenizer.text(src)) {
|
291 | src = src.substring(token.raw.length);
|
292 | lastToken = tokens[tokens.length - 1];
|
293 | if (lastToken && lastToken.type === 'text') {
|
294 | lastToken.raw += '\n' + token.raw;
|
295 | lastToken.text += '\n' + token.text;
|
296 | this.inlineQueue.pop();
|
297 | this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
|
298 | } else {
|
299 | tokens.push(token);
|
300 | }
|
301 | continue;
|
302 | }
|
303 |
|
304 | if (src) {
|
305 | const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
|
306 | if (this.options.silent) {
|
307 | console.error(errMsg);
|
308 | break;
|
309 | } else {
|
310 | throw new Error(errMsg);
|
311 | }
|
312 | }
|
313 | }
|
314 |
|
315 | this.state.top = true;
|
316 | return tokens;
|
317 | }
|
318 |
|
319 | inline(src, tokens = []) {
|
320 | this.inlineQueue.push({ src, tokens });
|
321 | return tokens;
|
322 | }
|
323 |
|
324 | |
325 |
|
326 |
|
327 | inlineTokens(src, tokens = []) {
|
328 | let token, lastToken, cutSrc;
|
329 |
|
330 |
|
331 | let maskedSrc = src;
|
332 | let match;
|
333 | let keepPrevChar, prevChar;
|
334 |
|
335 |
|
336 | if (this.tokens.links) {
|
337 | const links = Object.keys(this.tokens.links);
|
338 | if (links.length > 0) {
|
339 | while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
|
340 | if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
|
341 | maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
|
342 | }
|
343 | }
|
344 | }
|
345 | }
|
346 |
|
347 | while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
|
348 | maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
|
349 | }
|
350 |
|
351 |
|
352 | while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
|
353 | maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
|
354 | }
|
355 |
|
356 | while (src) {
|
357 | if (!keepPrevChar) {
|
358 | prevChar = '';
|
359 | }
|
360 | keepPrevChar = false;
|
361 |
|
362 |
|
363 | if (this.options.extensions
|
364 | && this.options.extensions.inline
|
365 | && this.options.extensions.inline.some((extTokenizer) => {
|
366 | if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
|
367 | src = src.substring(token.raw.length);
|
368 | tokens.push(token);
|
369 | return true;
|
370 | }
|
371 | return false;
|
372 | })) {
|
373 | continue;
|
374 | }
|
375 |
|
376 |
|
377 | if (token = this.tokenizer.escape(src)) {
|
378 | src = src.substring(token.raw.length);
|
379 | tokens.push(token);
|
380 | continue;
|
381 | }
|
382 |
|
383 |
|
384 | if (token = this.tokenizer.tag(src)) {
|
385 | src = src.substring(token.raw.length);
|
386 | lastToken = tokens[tokens.length - 1];
|
387 | if (lastToken && token.type === 'text' && lastToken.type === 'text') {
|
388 | lastToken.raw += token.raw;
|
389 | lastToken.text += token.text;
|
390 | } else {
|
391 | tokens.push(token);
|
392 | }
|
393 | continue;
|
394 | }
|
395 |
|
396 |
|
397 | if (token = this.tokenizer.link(src)) {
|
398 | src = src.substring(token.raw.length);
|
399 | tokens.push(token);
|
400 | continue;
|
401 | }
|
402 |
|
403 |
|
404 | if (token = this.tokenizer.reflink(src, this.tokens.links)) {
|
405 | src = src.substring(token.raw.length);
|
406 | lastToken = tokens[tokens.length - 1];
|
407 | if (lastToken && token.type === 'text' && lastToken.type === 'text') {
|
408 | lastToken.raw += token.raw;
|
409 | lastToken.text += token.text;
|
410 | } else {
|
411 | tokens.push(token);
|
412 | }
|
413 | continue;
|
414 | }
|
415 |
|
416 |
|
417 | if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
|
418 | src = src.substring(token.raw.length);
|
419 | tokens.push(token);
|
420 | continue;
|
421 | }
|
422 |
|
423 |
|
424 | if (token = this.tokenizer.codespan(src)) {
|
425 | src = src.substring(token.raw.length);
|
426 | tokens.push(token);
|
427 | continue;
|
428 | }
|
429 |
|
430 |
|
431 | if (token = this.tokenizer.br(src)) {
|
432 | src = src.substring(token.raw.length);
|
433 | tokens.push(token);
|
434 | continue;
|
435 | }
|
436 |
|
437 |
|
438 | if (token = this.tokenizer.del(src)) {
|
439 | src = src.substring(token.raw.length);
|
440 | tokens.push(token);
|
441 | continue;
|
442 | }
|
443 |
|
444 |
|
445 | if (token = this.tokenizer.autolink(src, mangle)) {
|
446 | src = src.substring(token.raw.length);
|
447 | tokens.push(token);
|
448 | continue;
|
449 | }
|
450 |
|
451 |
|
452 | if (!this.state.inLink && (token = this.tokenizer.url(src, mangle))) {
|
453 | src = src.substring(token.raw.length);
|
454 | tokens.push(token);
|
455 | continue;
|
456 | }
|
457 |
|
458 |
|
459 |
|
460 | cutSrc = src;
|
461 | if (this.options.extensions && this.options.extensions.startInline) {
|
462 | let startIndex = Infinity;
|
463 | const tempSrc = src.slice(1);
|
464 | let tempStart;
|
465 | this.options.extensions.startInline.forEach(function(getStartIndex) {
|
466 | tempStart = getStartIndex.call({ lexer: this }, tempSrc);
|
467 | if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
|
468 | });
|
469 | if (startIndex < Infinity && startIndex >= 0) {
|
470 | cutSrc = src.substring(0, startIndex + 1);
|
471 | }
|
472 | }
|
473 | if (token = this.tokenizer.inlineText(cutSrc, smartypants)) {
|
474 | src = src.substring(token.raw.length);
|
475 | if (token.raw.slice(-1) !== '_') {
|
476 | prevChar = token.raw.slice(-1);
|
477 | }
|
478 | keepPrevChar = true;
|
479 | lastToken = tokens[tokens.length - 1];
|
480 | if (lastToken && lastToken.type === 'text') {
|
481 | lastToken.raw += token.raw;
|
482 | lastToken.text += token.text;
|
483 | } else {
|
484 | tokens.push(token);
|
485 | }
|
486 | continue;
|
487 | }
|
488 |
|
489 | if (src) {
|
490 | const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
|
491 | if (this.options.silent) {
|
492 | console.error(errMsg);
|
493 | break;
|
494 | } else {
|
495 | throw new Error(errMsg);
|
496 | }
|
497 | }
|
498 | }
|
499 |
|
500 | return tokens;
|
501 | }
|
502 | }
|