UNPKG

11.9 kBJavaScriptView Raw
1const {
2 noopTest,
3 edit,
4 merge
5} = require('./helpers.js');
6
7/**
8 * Block-Level Grammar
9 */
10const block = {
11 newline: /^(?: *(?:\n|$))+/,
12 code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
13 fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
14 hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
15 heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
16 blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
17 list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/,
18 html: '^ {0,3}(?:' // optional indentation
19 + '<(script|pre|style)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
20 + '|comment[^\\n]*(\\n+|$)' // (2)
21 + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
22 + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
23 + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
24 + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:\\n{2,}|$)' // (6)
25 + '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag
26 + '|</(?!script|pre|style)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag
27 + ')',
28 def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,
29 nptable: noopTest,
30 table: noopTest,
31 lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
32 // regex template, placeholders will be replaced according to different paragraph
33 // interruption rules of commonmark and the original markdown spec:
34 _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html| +\n)[^\n]+)*)/,
35 text: /^[^\n]+/
36};
37
38block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/;
39block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
40block.def = edit(block.def)
41 .replace('label', block._label)
42 .replace('title', block._title)
43 .getRegex();
44
45block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
46block.item = /^( *)(bull) ?[^\n]*(?:\n(?! *bull ?)[^\n]*)*/;
47block.item = edit(block.item, 'gm')
48 .replace(/bull/g, block.bullet)
49 .getRegex();
50
51block.listItemStart = edit(/^( *)(bull) */)
52 .replace('bull', block.bullet)
53 .getRegex();
54
55block.list = edit(block.list)
56 .replace(/bull/g, block.bullet)
57 .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
58 .replace('def', '\\n+(?=' + block.def.source + ')')
59 .getRegex();
60
61block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
62 + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
63 + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
64 + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
65 + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
66 + '|track|ul';
67block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
68block.html = edit(block.html, 'i')
69 .replace('comment', block._comment)
70 .replace('tag', block._tag)
71 .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
72 .getRegex();
73
74block.paragraph = edit(block._paragraph)
75 .replace('hr', block.hr)
76 .replace('heading', ' {0,3}#{1,6} ')
77 .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
78 .replace('blockquote', ' {0,3}>')
79 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
80 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
81 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
82 .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
83 .getRegex();
84
85block.blockquote = edit(block.blockquote)
86 .replace('paragraph', block.paragraph)
87 .getRegex();
88
89/**
90 * Normal Block Grammar
91 */
92
93block.normal = merge({}, block);
94
95/**
96 * GFM Block Grammar
97 */
98
99block.gfm = merge({}, block.normal, {
100 nptable: '^ *([^|\\n ].*\\|.*)\\n' // Header
101 + ' {0,3}([-:]+ *\\|[-| :]*)' // Align
102 + '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)', // Cells
103 table: '^ *\\|(.+)\\n' // Header
104 + ' {0,3}\\|?( *[-:]+[-| :]*)' // Align
105 + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
106});
107
108block.gfm.nptable = edit(block.gfm.nptable)
109 .replace('hr', block.hr)
110 .replace('heading', ' {0,3}#{1,6} ')
111 .replace('blockquote', ' {0,3}>')
112 .replace('code', ' {4}[^\\n]')
113 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
114 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
115 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
116 .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
117 .getRegex();
118
119block.gfm.table = edit(block.gfm.table)
120 .replace('hr', block.hr)
121 .replace('heading', ' {0,3}#{1,6} ')
122 .replace('blockquote', ' {0,3}>')
123 .replace('code', ' {4}[^\\n]')
124 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
125 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
126 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
127 .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
128 .getRegex();
129
130/**
131 * Pedantic grammar (original John Gruber's loose markdown specification)
132 */
133
134block.pedantic = merge({}, block.normal, {
135 html: edit(
136 '^ *(?:comment *(?:\\n|\\s*$)'
137 + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
138 + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
139 .replace('comment', block._comment)
140 .replace(/tag/g, '(?!(?:'
141 + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
142 + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
143 + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
144 .getRegex(),
145 def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
146 heading: /^(#{1,6})(.*)(?:\n+|$)/,
147 fences: noopTest, // fences not supported
148 paragraph: edit(block.normal._paragraph)
149 .replace('hr', block.hr)
150 .replace('heading', ' *#{1,6} *[^\n]')
151 .replace('lheading', block.lheading)
152 .replace('blockquote', ' {0,3}>')
153 .replace('|fences', '')
154 .replace('|list', '')
155 .replace('|html', '')
156 .getRegex()
157});
158
159/**
160 * Inline-Level Grammar
161 */
162const inline = {
163 escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
164 autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
165 url: noopTest,
166 tag: '^comment'
167 + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
168 + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
169 + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
170 + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
171 + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
172 link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
173 reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
174 nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
175 reflinkSearch: 'reflink|nolink(?!\\()',
176 emStrong: {
177 lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
178 // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
179 // () Skip other delimiter (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
180 rDelimAst: /\_\_[^_]*?\*[^_]*?\_\_|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
181 rDelimUnd: /\*\*[^*]*?\_[^*]*?\*\*|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
182 },
183 code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
184 br: /^( {2,}|\\)\n(?!\s*$)/,
185 del: noopTest,
186 text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
187 punctuation: /^([\spunctuation])/
188};
189
190// list of punctuation marks from CommonMark spec
191// without * and _ to handle the different emphasis markers * and _
192inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
193inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
194
195// sequences em should skip over [title](link), `code`, <html>
196inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
197inline.escapedEmSt = /\\\*|\\_/g;
198
199inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
200
201inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
202 .replace(/punct/g, inline._punctuation)
203 .getRegex();
204
205inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g')
206 .replace(/punct/g, inline._punctuation)
207 .getRegex();
208
209inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g')
210 .replace(/punct/g, inline._punctuation)
211 .getRegex();
212
213inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
214
215inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
216inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
217inline.autolink = edit(inline.autolink)
218 .replace('scheme', inline._scheme)
219 .replace('email', inline._email)
220 .getRegex();
221
222inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
223
224inline.tag = edit(inline.tag)
225 .replace('comment', inline._comment)
226 .replace('attribute', inline._attribute)
227 .getRegex();
228
229inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
230inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
231inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
232
233inline.link = edit(inline.link)
234 .replace('label', inline._label)
235 .replace('href', inline._href)
236 .replace('title', inline._title)
237 .getRegex();
238
239inline.reflink = edit(inline.reflink)
240 .replace('label', inline._label)
241 .getRegex();
242
243inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
244 .replace('reflink', inline.reflink)
245 .replace('nolink', inline.nolink)
246 .getRegex();
247
248/**
249 * Normal Inline Grammar
250 */
251
252inline.normal = merge({}, inline);
253
254/**
255 * Pedantic Inline Grammar
256 */
257
258inline.pedantic = merge({}, inline.normal, {
259 strong: {
260 start: /^__|\*\*/,
261 middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
262 endAst: /\*\*(?!\*)/g,
263 endUnd: /__(?!_)/g
264 },
265 em: {
266 start: /^_|\*/,
267 middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
268 endAst: /\*(?!\*)/g,
269 endUnd: /_(?!_)/g
270 },
271 link: edit(/^!?\[(label)\]\((.*?)\)/)
272 .replace('label', inline._label)
273 .getRegex(),
274 reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
275 .replace('label', inline._label)
276 .getRegex()
277});
278
279/**
280 * GFM Inline Grammar
281 */
282
283inline.gfm = merge({}, inline.normal, {
284 escape: edit(inline.escape).replace('])', '~|])').getRegex(),
285 _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
286 url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
287 _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
288 del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
289 text: /^([`~]+|[^`~])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))/
290});
291
292inline.gfm.url = edit(inline.gfm.url, 'i')
293 .replace('email', inline.gfm._extended_email)
294 .getRegex();
295/**
296 * GFM + Line Breaks Inline Grammar
297 */
298
299inline.breaks = merge({}, inline.gfm, {
300 br: edit(inline.br).replace('{2,}', '*').getRegex(),
301 text: edit(inline.gfm.text)
302 .replace('\\b_', '\\b_| {2,}\\n')
303 .replace(/\{2,\}/g, '*')
304 .getRegex()
305});
306
307module.exports = {
308 block,
309 inline
310};