UNPKG

10.3 kBJavaScriptView Raw
1const {
2 noopTest,
3 edit,
4 merge
5} = require('./helpers.js');
6
7/**
8 * Block-Level Grammar
9 */
10const block = {
11 newline: /^\n+/,
12 code: /^( {4}[^\n]+\n*)+/,
13 fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
14 hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
15 heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/,
16 blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
17 list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,
18 html: '^ {0,3}(?:' // optional indentation
19 + '<(script|pre|style)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
20 + '|comment[^\\n]*(\\n+|$)' // (2)
21 + '|<\\?[\\s\\S]*?\\?>\\n*' // (3)
22 + '|<![A-Z][\\s\\S]*?>\\n*' // (4)
23 + '|<!\\[CDATA\\[[\\s\\S]*?\\]\\]>\\n*' // (5)
24 + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:\\n{2,}|$)' // (6)
25 + '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag
26 + '|</(?!script|pre|style)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag
27 + ')',
28 def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,
29 nptable: noopTest,
30 table: noopTest,
31 lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
32 // regex template, placeholders will be replaced according to different paragraph
33 // interruption rules of commonmark and the original markdown spec:
34 _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html)[^\n]+)*)/,
35 text: /^[^\n]+/
36};
37
38block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/;
39block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
40block.def = edit(block.def)
41 .replace('label', block._label)
42 .replace('title', block._title)
43 .getRegex();
44
45block.bullet = /(?:[*+-]|\d{1,9}\.)/;
46block.item = /^( *)(bull) ?[^\n]*(?:\n(?!\1bull ?)[^\n]*)*/;
47block.item = edit(block.item, 'gm')
48 .replace(/bull/g, block.bullet)
49 .getRegex();
50
51block.list = edit(block.list)
52 .replace(/bull/g, block.bullet)
53 .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
54 .replace('def', '\\n+(?=' + block.def.source + ')')
55 .getRegex();
56
57block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
58 + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
59 + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
60 + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
61 + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
62 + '|track|ul';
63block._comment = /<!--(?!-?>)[\s\S]*?-->/;
64block.html = edit(block.html, 'i')
65 .replace('comment', block._comment)
66 .replace('tag', block._tag)
67 .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
68 .getRegex();
69
70block.paragraph = edit(block._paragraph)
71 .replace('hr', block.hr)
72 .replace('heading', ' {0,3}#{1,6} ')
73 .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
74 .replace('blockquote', ' {0,3}>')
75 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
76 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
77 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
78 .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
79 .getRegex();
80
81block.blockquote = edit(block.blockquote)
82 .replace('paragraph', block.paragraph)
83 .getRegex();
84
85/**
86 * Normal Block Grammar
87 */
88
89block.normal = merge({}, block);
90
91/**
92 * GFM Block Grammar
93 */
94
95block.gfm = merge({}, block.normal, {
96 nptable: '^ *([^|\\n ].*\\|.*)\\n' // Header
97 + ' *([-:]+ *\\|[-| :]*)' // Align
98 + '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)', // Cells
99 table: '^ *\\|(.+)\\n' // Header
100 + ' *\\|?( *[-:]+[-| :]*)' // Align
101 + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
102});
103
104block.gfm.nptable = edit(block.gfm.nptable)
105 .replace('hr', block.hr)
106 .replace('heading', ' {0,3}#{1,6} ')
107 .replace('blockquote', ' {0,3}>')
108 .replace('code', ' {4}[^\\n]')
109 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
110 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
111 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
112 .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
113 .getRegex();
114
115block.gfm.table = edit(block.gfm.table)
116 .replace('hr', block.hr)
117 .replace('heading', ' {0,3}#{1,6} ')
118 .replace('blockquote', ' {0,3}>')
119 .replace('code', ' {4}[^\\n]')
120 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
121 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
122 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)')
123 .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
124 .getRegex();
125
126/**
127 * Pedantic grammar (original John Gruber's loose markdown specification)
128 */
129
130block.pedantic = merge({}, block.normal, {
131 html: edit(
132 '^ *(?:comment *(?:\\n|\\s*$)'
133 + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
134 + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
135 .replace('comment', block._comment)
136 .replace(/tag/g, '(?!(?:'
137 + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
138 + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
139 + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
140 .getRegex(),
141 def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
142 heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/,
143 fences: noopTest, // fences not supported
144 paragraph: edit(block.normal._paragraph)
145 .replace('hr', block.hr)
146 .replace('heading', ' *#{1,6} *[^\n]')
147 .replace('lheading', block.lheading)
148 .replace('blockquote', ' {0,3}>')
149 .replace('|fences', '')
150 .replace('|list', '')
151 .replace('|html', '')
152 .getRegex()
153});
154
155/**
156 * Inline-Level Grammar
157 */
158const inline = {
159 escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
160 autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
161 url: noopTest,
162 tag: '^comment'
163 + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
164 + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
165 + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
166 + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
167 + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
168 link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
169 reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
170 nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
171 strong: /^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,
172 em: /^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,
173 code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
174 br: /^( {2,}|\\)\n(?!\s*$)/,
175 del: noopTest,
176 text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/
177};
178
179// list of punctuation marks from common mark spec
180// without ` and ] to workaround Rule 17 (inline code blocks/links)
181inline._punctuation = '!"#$%&\'()*+\\-./:;<=>?@\\[^_{|}~';
182inline.em = edit(inline.em).replace(/punctuation/g, inline._punctuation).getRegex();
183
184inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
185
186inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
187inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
188inline.autolink = edit(inline.autolink)
189 .replace('scheme', inline._scheme)
190 .replace('email', inline._email)
191 .getRegex();
192
193inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
194
195inline.tag = edit(inline.tag)
196 .replace('comment', block._comment)
197 .replace('attribute', inline._attribute)
198 .getRegex();
199
200inline._label = /(?:\[[^\[\]]*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
201inline._href = /<(?:\\[<>]?|[^\s<>\\])*>|[^\s\x00-\x1f]*/;
202inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
203
204inline.link = edit(inline.link)
205 .replace('label', inline._label)
206 .replace('href', inline._href)
207 .replace('title', inline._title)
208 .getRegex();
209
210inline.reflink = edit(inline.reflink)
211 .replace('label', inline._label)
212 .getRegex();
213
214/**
215 * Normal Inline Grammar
216 */
217
218inline.normal = merge({}, inline);
219
220/**
221 * Pedantic Inline Grammar
222 */
223
224inline.pedantic = merge({}, inline.normal, {
225 strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
226 em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/,
227 link: edit(/^!?\[(label)\]\((.*?)\)/)
228 .replace('label', inline._label)
229 .getRegex(),
230 reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
231 .replace('label', inline._label)
232 .getRegex()
233});
234
235/**
236 * GFM Inline Grammar
237 */
238
239inline.gfm = merge({}, inline.normal, {
240 escape: edit(inline.escape).replace('])', '~|])').getRegex(),
241 _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
242 url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
243 _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
244 del: /^~+(?=\S)([\s\S]*?\S)~+/,
245 text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*~]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))|(?= {2,}\n|[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))/
246});
247
248inline.gfm.url = edit(inline.gfm.url, 'i')
249 .replace('email', inline.gfm._extended_email)
250 .getRegex();
251/**
252 * GFM + Line Breaks Inline Grammar
253 */
254
255inline.breaks = merge({}, inline.gfm, {
256 br: edit(inline.br).replace('{2,}', '*').getRegex(),
257 text: edit(inline.gfm.text)
258 .replace('\\b_', '\\b_| {2,}\\n')
259 .replace(/\{2,\}/g, '*')
260 .getRegex()
261});
262
263module.exports = {
264 block,
265 inline
266};