1 | const { defaults } = require('./defaults.js');
|
2 | const { block } = require('./rules.js');
|
3 | const {
|
4 | rtrim,
|
5 | splitCells,
|
6 | escape
|
7 | } = require('./helpers.js');
|
8 |
|
9 |
|
10 |
|
11 |
|
12 | module.exports = class Lexer {
|
13 | constructor(options) {
|
14 | this.tokens = [];
|
15 | this.tokens.links = Object.create(null);
|
16 | this.options = options || defaults;
|
17 | this.rules = block.normal;
|
18 |
|
19 | if (this.options.pedantic) {
|
20 | this.rules = block.pedantic;
|
21 | } else if (this.options.gfm) {
|
22 | this.rules = block.gfm;
|
23 | }
|
24 | }
|
25 |
|
26 | |
27 |
|
28 |
|
29 | static get rules() {
|
30 | return block;
|
31 | }
|
32 |
|
33 | |
34 |
|
35 |
|
36 | static lex(src, options) {
|
37 | const lexer = new Lexer(options);
|
38 | return lexer.lex(src);
|
39 | };
|
40 |
|
41 | |
42 |
|
43 |
|
44 | lex(src) {
|
45 | src = src
|
46 | .replace(/\r\n|\r/g, '\n')
|
47 | .replace(/\t/g, ' ');
|
48 |
|
49 | return this.token(src, true);
|
50 | };
|
51 |
|
52 | |
53 |
|
54 |
|
55 | token(src, top) {
|
56 | src = src.replace(/^ +$/gm, '');
|
57 | let next,
|
58 | loose,
|
59 | cap,
|
60 | bull,
|
61 | b,
|
62 | item,
|
63 | listStart,
|
64 | listItems,
|
65 | t,
|
66 | space,
|
67 | i,
|
68 | tag,
|
69 | l,
|
70 | isordered,
|
71 | istask,
|
72 | ischecked;
|
73 |
|
74 | while (src) {
|
75 |
|
76 | if (cap = this.rules.newline.exec(src)) {
|
77 | src = src.substring(cap[0].length);
|
78 | if (cap[0].length > 1) {
|
79 | this.tokens.push({
|
80 | type: 'space'
|
81 | });
|
82 | }
|
83 | }
|
84 |
|
85 |
|
86 | if (cap = this.rules.code.exec(src)) {
|
87 | const lastToken = this.tokens[this.tokens.length - 1];
|
88 | src = src.substring(cap[0].length);
|
89 |
|
90 | if (lastToken && lastToken.type === 'paragraph') {
|
91 | lastToken.text += '\n' + cap[0].trimRight();
|
92 | } else {
|
93 | cap = cap[0].replace(/^ {4}/gm, '');
|
94 | this.tokens.push({
|
95 | type: 'code',
|
96 | codeBlockStyle: 'indented',
|
97 | text: !this.options.pedantic
|
98 | ? rtrim(cap, '\n')
|
99 | : cap
|
100 | });
|
101 | }
|
102 | continue;
|
103 | }
|
104 |
|
105 |
|
106 | if (cap = this.rules.fences.exec(src)) {
|
107 | src = src.substring(cap[0].length);
|
108 | this.tokens.push({
|
109 | type: 'code',
|
110 | lang: cap[2] ? cap[2].trim() : cap[2],
|
111 | text: cap[3] || ''
|
112 | });
|
113 | continue;
|
114 | }
|
115 |
|
116 |
|
117 | if (cap = this.rules.heading.exec(src)) {
|
118 | src = src.substring(cap[0].length);
|
119 | this.tokens.push({
|
120 | type: 'heading',
|
121 | depth: cap[1].length,
|
122 | text: cap[2]
|
123 | });
|
124 | continue;
|
125 | }
|
126 |
|
127 |
|
128 | if (cap = this.rules.nptable.exec(src)) {
|
129 | item = {
|
130 | type: 'table',
|
131 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
132 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
133 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
|
134 | };
|
135 |
|
136 | if (item.header.length === item.align.length) {
|
137 | src = src.substring(cap[0].length);
|
138 |
|
139 | for (i = 0; i < item.align.length; i++) {
|
140 | if (/^ *-+: *$/.test(item.align[i])) {
|
141 | item.align[i] = 'right';
|
142 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
143 | item.align[i] = 'center';
|
144 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
145 | item.align[i] = 'left';
|
146 | } else {
|
147 | item.align[i] = null;
|
148 | }
|
149 | }
|
150 |
|
151 | for (i = 0; i < item.cells.length; i++) {
|
152 | item.cells[i] = splitCells(item.cells[i], item.header.length);
|
153 | }
|
154 |
|
155 | this.tokens.push(item);
|
156 |
|
157 | continue;
|
158 | }
|
159 | }
|
160 |
|
161 |
|
162 | if (cap = this.rules.hr.exec(src)) {
|
163 | src = src.substring(cap[0].length);
|
164 | this.tokens.push({
|
165 | type: 'hr'
|
166 | });
|
167 | continue;
|
168 | }
|
169 |
|
170 |
|
171 | if (cap = this.rules.blockquote.exec(src)) {
|
172 | src = src.substring(cap[0].length);
|
173 |
|
174 | this.tokens.push({
|
175 | type: 'blockquote_start'
|
176 | });
|
177 |
|
178 | cap = cap[0].replace(/^ *> ?/gm, '');
|
179 |
|
180 |
|
181 |
|
182 |
|
183 | this.token(cap, top);
|
184 |
|
185 | this.tokens.push({
|
186 | type: 'blockquote_end'
|
187 | });
|
188 |
|
189 | continue;
|
190 | }
|
191 |
|
192 |
|
193 | if (cap = this.rules.list.exec(src)) {
|
194 | src = src.substring(cap[0].length);
|
195 | bull = cap[2];
|
196 | isordered = bull.length > 1;
|
197 |
|
198 | listStart = {
|
199 | type: 'list_start',
|
200 | ordered: isordered,
|
201 | start: isordered ? +bull : '',
|
202 | loose: false
|
203 | };
|
204 |
|
205 | this.tokens.push(listStart);
|
206 |
|
207 |
|
208 | cap = cap[0].match(this.rules.item);
|
209 |
|
210 | listItems = [];
|
211 | next = false;
|
212 | l = cap.length;
|
213 | i = 0;
|
214 |
|
215 | for (; i < l; i++) {
|
216 | item = cap[i];
|
217 |
|
218 |
|
219 |
|
220 | space = item.length;
|
221 | item = item.replace(/^ *([*+-]|\d+\.) */, '');
|
222 |
|
223 |
|
224 |
|
225 | if (~item.indexOf('\n ')) {
|
226 | space -= item.length;
|
227 | item = !this.options.pedantic
|
228 | ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
|
229 | : item.replace(/^ {1,4}/gm, '');
|
230 | }
|
231 |
|
232 |
|
233 |
|
234 | if (i !== l - 1) {
|
235 | b = block.bullet.exec(cap[i + 1])[0];
|
236 | if (bull.length > 1 ? b.length === 1
|
237 | : (b.length > 1 || (this.options.smartLists && b !== bull))) {
|
238 | src = cap.slice(i + 1).join('\n') + src;
|
239 | i = l - 1;
|
240 | }
|
241 | }
|
242 |
|
243 |
|
244 |
|
245 |
|
246 | loose = next || /\n\n(?!\s*$)/.test(item);
|
247 | if (i !== l - 1) {
|
248 | next = item.charAt(item.length - 1) === '\n';
|
249 | if (!loose) loose = next;
|
250 | }
|
251 |
|
252 | if (loose) {
|
253 | listStart.loose = true;
|
254 | }
|
255 |
|
256 |
|
257 | istask = /^\[[ xX]\] /.test(item);
|
258 | ischecked = undefined;
|
259 | if (istask) {
|
260 | ischecked = item[1] !== ' ';
|
261 | item = item.replace(/^\[[ xX]\] +/, '');
|
262 | }
|
263 |
|
264 | t = {
|
265 | type: 'list_item_start',
|
266 | task: istask,
|
267 | checked: ischecked,
|
268 | loose: loose
|
269 | };
|
270 |
|
271 | listItems.push(t);
|
272 | this.tokens.push(t);
|
273 |
|
274 |
|
275 | this.token(item, false);
|
276 |
|
277 | this.tokens.push({
|
278 | type: 'list_item_end'
|
279 | });
|
280 | }
|
281 |
|
282 | if (listStart.loose) {
|
283 | l = listItems.length;
|
284 | i = 0;
|
285 | for (; i < l; i++) {
|
286 | listItems[i].loose = true;
|
287 | }
|
288 | }
|
289 |
|
290 | this.tokens.push({
|
291 | type: 'list_end'
|
292 | });
|
293 |
|
294 | continue;
|
295 | }
|
296 |
|
297 |
|
298 | if (cap = this.rules.html.exec(src)) {
|
299 | src = src.substring(cap[0].length);
|
300 | this.tokens.push({
|
301 | type: this.options.sanitize
|
302 | ? 'paragraph'
|
303 | : 'html',
|
304 | pre: !this.options.sanitizer
|
305 | && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
|
306 | text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
|
307 | });
|
308 | continue;
|
309 | }
|
310 |
|
311 |
|
312 | if (top && (cap = this.rules.def.exec(src))) {
|
313 | src = src.substring(cap[0].length);
|
314 | if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
|
315 | tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
|
316 | if (!this.tokens.links[tag]) {
|
317 | this.tokens.links[tag] = {
|
318 | href: cap[2],
|
319 | title: cap[3]
|
320 | };
|
321 | }
|
322 | continue;
|
323 | }
|
324 |
|
325 |
|
326 | if (cap = this.rules.table.exec(src)) {
|
327 | item = {
|
328 | type: 'table',
|
329 | header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
330 | align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
331 | cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
|
332 | };
|
333 |
|
334 | if (item.header.length === item.align.length) {
|
335 | src = src.substring(cap[0].length);
|
336 |
|
337 | for (i = 0; i < item.align.length; i++) {
|
338 | if (/^ *-+: *$/.test(item.align[i])) {
|
339 | item.align[i] = 'right';
|
340 | } else if (/^ *:-+: *$/.test(item.align[i])) {
|
341 | item.align[i] = 'center';
|
342 | } else if (/^ *:-+ *$/.test(item.align[i])) {
|
343 | item.align[i] = 'left';
|
344 | } else {
|
345 | item.align[i] = null;
|
346 | }
|
347 | }
|
348 |
|
349 | for (i = 0; i < item.cells.length; i++) {
|
350 | item.cells[i] = splitCells(
|
351 | item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
|
352 | item.header.length);
|
353 | }
|
354 |
|
355 | this.tokens.push(item);
|
356 |
|
357 | continue;
|
358 | }
|
359 | }
|
360 |
|
361 |
|
362 | if (cap = this.rules.lheading.exec(src)) {
|
363 | src = src.substring(cap[0].length);
|
364 | this.tokens.push({
|
365 | type: 'heading',
|
366 | depth: cap[2].charAt(0) === '=' ? 1 : 2,
|
367 | text: cap[1]
|
368 | });
|
369 | continue;
|
370 | }
|
371 |
|
372 |
|
373 | if (top && (cap = this.rules.paragraph.exec(src))) {
|
374 | src = src.substring(cap[0].length);
|
375 | this.tokens.push({
|
376 | type: 'paragraph',
|
377 | text: cap[1].charAt(cap[1].length - 1) === '\n'
|
378 | ? cap[1].slice(0, -1)
|
379 | : cap[1]
|
380 | });
|
381 | continue;
|
382 | }
|
383 |
|
384 |
|
385 | if (cap = this.rules.text.exec(src)) {
|
386 |
|
387 | src = src.substring(cap[0].length);
|
388 | this.tokens.push({
|
389 | type: 'text',
|
390 | text: cap[0]
|
391 | });
|
392 | continue;
|
393 | }
|
394 |
|
395 | if (src) {
|
396 | throw new Error('Infinite loop on byte: ' + src.charCodeAt(0));
|
397 | }
|
398 | }
|
399 |
|
400 | return this.tokens;
|
401 | };
|
402 | };
|