UNPKG

11.1 kBJavaScriptView Raw
1const {
2 noopTest,
3 edit,
4 merge
5} = require('./helpers.js');
6
7/**
8 * Block-Level Grammar
9 */
10const block = {
11 newline: /^(?: *(?:\n|$))+/,
12 code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
13 fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
14 hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
15 heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
16 blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
17 list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/,
18 html: '^ {0,3}(?:' // optional indentation
19 + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
20 + '|comment[^\\n]*(\\n+|$)' // (2)
21 + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
22 + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
23 + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
24 + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
25 + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
26 + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
27 + ')',
28 def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,
29 table: noopTest,
30 lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
31 // regex template, placeholders will be replaced according to different paragraph
32 // interruption rules of commonmark and the original markdown spec:
33 _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html| +\n)[^\n]+)*)/,
34 text: /^[^\n]+/
35};
36
37block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/;
38block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
39block.def = edit(block.def)
40 .replace('label', block._label)
41 .replace('title', block._title)
42 .getRegex();
43
44block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
45block.listItemStart = edit(/^( *)(bull) */)
46 .replace('bull', block.bullet)
47 .getRegex();
48
49block.list = edit(block.list)
50 .replace(/bull/g, block.bullet)
51 .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
52 .replace('def', '\\n+(?=' + block.def.source + ')')
53 .getRegex();
54
55block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
56 + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
57 + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
58 + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
59 + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
60 + '|track|ul';
61block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
62block.html = edit(block.html, 'i')
63 .replace('comment', block._comment)
64 .replace('tag', block._tag)
65 .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
66 .getRegex();
67
68block.paragraph = edit(block._paragraph)
69 .replace('hr', block.hr)
70 .replace('heading', ' {0,3}#{1,6} ')
71 .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
72 .replace('blockquote', ' {0,3}>')
73 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
74 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
75 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
76 .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
77 .getRegex();
78
79block.blockquote = edit(block.blockquote)
80 .replace('paragraph', block.paragraph)
81 .getRegex();
82
83/**
84 * Normal Block Grammar
85 */
86
87block.normal = merge({}, block);
88
89/**
90 * GFM Block Grammar
91 */
92
93block.gfm = merge({}, block.normal, {
94 table: '^ *([^\\n ].*\\|.*)\\n' // Header
95 + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)\\|?' // Align
96 + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
97});
98
99block.gfm.table = edit(block.gfm.table)
100 .replace('hr', block.hr)
101 .replace('heading', ' {0,3}#{1,6} ')
102 .replace('blockquote', ' {0,3}>')
103 .replace('code', ' {4}[^\\n]')
104 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
105 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
106 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
107 .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
108 .getRegex();
109
110/**
111 * Pedantic grammar (original John Gruber's loose markdown specification)
112 */
113
114block.pedantic = merge({}, block.normal, {
115 html: edit(
116 '^ *(?:comment *(?:\\n|\\s*$)'
117 + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
118 + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
119 .replace('comment', block._comment)
120 .replace(/tag/g, '(?!(?:'
121 + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
122 + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
123 + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
124 .getRegex(),
125 def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
126 heading: /^(#{1,6})(.*)(?:\n+|$)/,
127 fences: noopTest, // fences not supported
128 paragraph: edit(block.normal._paragraph)
129 .replace('hr', block.hr)
130 .replace('heading', ' *#{1,6} *[^\n]')
131 .replace('lheading', block.lheading)
132 .replace('blockquote', ' {0,3}>')
133 .replace('|fences', '')
134 .replace('|list', '')
135 .replace('|html', '')
136 .getRegex()
137});
138
139/**
140 * Inline-Level Grammar
141 */
142const inline = {
143 escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
144 autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
145 url: noopTest,
146 tag: '^comment'
147 + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
148 + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
149 + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
150 + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
151 + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
152 link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
153 reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
154 nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
155 reflinkSearch: 'reflink|nolink(?!\\()',
156 emStrong: {
157 lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
158 // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
159 // () Skip other delimiter (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
160 rDelimAst: /\_\_[^_*]*?\*[^_*]*?\_\_|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
161 rDelimUnd: /\*\*[^_*]*?\_[^_*]*?\*\*|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
162 },
163 code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
164 br: /^( {2,}|\\)\n(?!\s*$)/,
165 del: noopTest,
166 text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
167 punctuation: /^([\spunctuation])/
168};
169
170// list of punctuation marks from CommonMark spec
171// without * and _ to handle the different emphasis markers * and _
172inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
173inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
174
175// sequences em should skip over [title](link), `code`, <html>
176inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
177inline.escapedEmSt = /\\\*|\\_/g;
178
179inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
180
181inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
182 .replace(/punct/g, inline._punctuation)
183 .getRegex();
184
185inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g')
186 .replace(/punct/g, inline._punctuation)
187 .getRegex();
188
189inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g')
190 .replace(/punct/g, inline._punctuation)
191 .getRegex();
192
193inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
194
195inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
196inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
197inline.autolink = edit(inline.autolink)
198 .replace('scheme', inline._scheme)
199 .replace('email', inline._email)
200 .getRegex();
201
202inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
203
204inline.tag = edit(inline.tag)
205 .replace('comment', inline._comment)
206 .replace('attribute', inline._attribute)
207 .getRegex();
208
209inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
210inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
211inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
212
213inline.link = edit(inline.link)
214 .replace('label', inline._label)
215 .replace('href', inline._href)
216 .replace('title', inline._title)
217 .getRegex();
218
219inline.reflink = edit(inline.reflink)
220 .replace('label', inline._label)
221 .getRegex();
222
223inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
224 .replace('reflink', inline.reflink)
225 .replace('nolink', inline.nolink)
226 .getRegex();
227
228/**
229 * Normal Inline Grammar
230 */
231
232inline.normal = merge({}, inline);
233
234/**
235 * Pedantic Inline Grammar
236 */
237
238inline.pedantic = merge({}, inline.normal, {
239 strong: {
240 start: /^__|\*\*/,
241 middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
242 endAst: /\*\*(?!\*)/g,
243 endUnd: /__(?!_)/g
244 },
245 em: {
246 start: /^_|\*/,
247 middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
248 endAst: /\*(?!\*)/g,
249 endUnd: /_(?!_)/g
250 },
251 link: edit(/^!?\[(label)\]\((.*?)\)/)
252 .replace('label', inline._label)
253 .getRegex(),
254 reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
255 .replace('label', inline._label)
256 .getRegex()
257});
258
259/**
260 * GFM Inline Grammar
261 */
262
263inline.gfm = merge({}, inline.normal, {
264 escape: edit(inline.escape).replace('])', '~|])').getRegex(),
265 _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
266 url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
267 _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
268 del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
269 text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
270});
271
272inline.gfm.url = edit(inline.gfm.url, 'i')
273 .replace('email', inline.gfm._extended_email)
274 .getRegex();
275/**
276 * GFM + Line Breaks Inline Grammar
277 */
278
279inline.breaks = merge({}, inline.gfm, {
280 br: edit(inline.br).replace('{2,}', '*').getRegex(),
281 text: edit(inline.gfm.text)
282 .replace('\\b_', '\\b_| {2,}\\n')
283 .replace(/\{2,\}/g, '*')
284 .getRegex()
285});
286
287module.exports = {
288 block,
289 inline
290};