UNPKG

11.8 kBJavaScriptView Raw
1import {
2 noopTest,
3 edit,
4 merge
5} from './helpers.js';
6
7/**
8 * Block-Level Grammar
9 */
10export const block = {
11 newline: /^(?: *(?:\n|$))+/,
12 code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
13 fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
14 hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
15 heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
16 blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
17 list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/,
18 html: '^ {0,3}(?:' // optional indentation
19 + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
20 + '|comment[^\\n]*(\\n+|$)' // (2)
21 + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
22 + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
23 + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
24 + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
25 + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
26 + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
27 + ')',
28 def: /^ {0,3}\[(label)\]: *(?:\n *)?<?([^\s>]+)>?(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/,
29 table: noopTest,
30 lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
31 // regex template, placeholders will be replaced according to different paragraph
32 // interruption rules of commonmark and the original markdown spec:
33 _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,
34 text: /^[^\n]+/
35};
36
37block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
38block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
39block.def = edit(block.def)
40 .replace('label', block._label)
41 .replace('title', block._title)
42 .getRegex();
43
44block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
45block.listItemStart = edit(/^( *)(bull) */)
46 .replace('bull', block.bullet)
47 .getRegex();
48
49block.list = edit(block.list)
50 .replace(/bull/g, block.bullet)
51 .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
52 .replace('def', '\\n+(?=' + block.def.source + ')')
53 .getRegex();
54
55block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
56 + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
57 + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
58 + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
59 + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
60 + '|track|ul';
61block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
62block.html = edit(block.html, 'i')
63 .replace('comment', block._comment)
64 .replace('tag', block._tag)
65 .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
66 .getRegex();
67
68block.paragraph = edit(block._paragraph)
69 .replace('hr', block.hr)
70 .replace('heading', ' {0,3}#{1,6} ')
71 .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
72 .replace('|table', '')
73 .replace('blockquote', ' {0,3}>')
74 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
75 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
76 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
77 .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
78 .getRegex();
79
80block.blockquote = edit(block.blockquote)
81 .replace('paragraph', block.paragraph)
82 .getRegex();
83
84/**
85 * Normal Block Grammar
86 */
87
88block.normal = merge({}, block);
89
90/**
91 * GFM Block Grammar
92 */
93
94block.gfm = merge({}, block.normal, {
95 table: '^ *([^\\n ].*\\|.*)\\n' // Header
96 + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align
97 + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
98});
99
100block.gfm.table = edit(block.gfm.table)
101 .replace('hr', block.hr)
102 .replace('heading', ' {0,3}#{1,6} ')
103 .replace('blockquote', ' {0,3}>')
104 .replace('code', ' {4}[^\\n]')
105 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
106 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
107 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
108 .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
109 .getRegex();
110
111block.gfm.paragraph = edit(block._paragraph)
112 .replace('hr', block.hr)
113 .replace('heading', ' {0,3}#{1,6} ')
114 .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
115 .replace('table', block.gfm.table) // interrupt paragraphs with table
116 .replace('blockquote', ' {0,3}>')
117 .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
118 .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
119 .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
120 .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
121 .getRegex();
122/**
123 * Pedantic grammar (original John Gruber's loose markdown specification)
124 */
125
126block.pedantic = merge({}, block.normal, {
127 html: edit(
128 '^ *(?:comment *(?:\\n|\\s*$)'
129 + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
130 + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
131 .replace('comment', block._comment)
132 .replace(/tag/g, '(?!(?:'
133 + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
134 + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
135 + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
136 .getRegex(),
137 def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
138 heading: /^(#{1,6})(.*)(?:\n+|$)/,
139 fences: noopTest, // fences not supported
140 paragraph: edit(block.normal._paragraph)
141 .replace('hr', block.hr)
142 .replace('heading', ' *#{1,6} *[^\n]')
143 .replace('lheading', block.lheading)
144 .replace('blockquote', ' {0,3}>')
145 .replace('|fences', '')
146 .replace('|list', '')
147 .replace('|html', '')
148 .getRegex()
149});
150
151/**
152 * Inline-Level Grammar
153 */
154export const inline = {
155 escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
156 autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
157 url: noopTest,
158 tag: '^comment'
159 + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
160 + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
161 + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
162 + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
163 + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
164 link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
165 reflink: /^!?\[(label)\]\[(ref)\]/,
166 nolink: /^!?\[(ref)\](?:\[\])?/,
167 reflinkSearch: 'reflink|nolink(?!\\()',
168 emStrong: {
169 lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
170 // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
171 // () Skip orphan delim inside strong (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
172 rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
173 rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
174 },
175 code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
176 br: /^( {2,}|\\)\n(?!\s*$)/,
177 del: noopTest,
178 text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
179 punctuation: /^([\spunctuation])/
180};
181
182// list of punctuation marks from CommonMark spec
183// without * and _ to handle the different emphasis markers * and _
184inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
185inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
186
187// sequences em should skip over [title](link), `code`, <html>
188inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
189inline.escapedEmSt = /\\\*|\\_/g;
190
191inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
192
193inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
194 .replace(/punct/g, inline._punctuation)
195 .getRegex();
196
197inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g')
198 .replace(/punct/g, inline._punctuation)
199 .getRegex();
200
201inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g')
202 .replace(/punct/g, inline._punctuation)
203 .getRegex();
204
205inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
206
207inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
208inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
209inline.autolink = edit(inline.autolink)
210 .replace('scheme', inline._scheme)
211 .replace('email', inline._email)
212 .getRegex();
213
214inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
215
216inline.tag = edit(inline.tag)
217 .replace('comment', inline._comment)
218 .replace('attribute', inline._attribute)
219 .getRegex();
220
221inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
222inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
223inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
224
225inline.link = edit(inline.link)
226 .replace('label', inline._label)
227 .replace('href', inline._href)
228 .replace('title', inline._title)
229 .getRegex();
230
231inline.reflink = edit(inline.reflink)
232 .replace('label', inline._label)
233 .replace('ref', block._label)
234 .getRegex();
235
236inline.nolink = edit(inline.nolink)
237 .replace('ref', block._label)
238 .getRegex();
239
240inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
241 .replace('reflink', inline.reflink)
242 .replace('nolink', inline.nolink)
243 .getRegex();
244
245/**
246 * Normal Inline Grammar
247 */
248
249inline.normal = merge({}, inline);
250
251/**
252 * Pedantic Inline Grammar
253 */
254
255inline.pedantic = merge({}, inline.normal, {
256 strong: {
257 start: /^__|\*\*/,
258 middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
259 endAst: /\*\*(?!\*)/g,
260 endUnd: /__(?!_)/g
261 },
262 em: {
263 start: /^_|\*/,
264 middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
265 endAst: /\*(?!\*)/g,
266 endUnd: /_(?!_)/g
267 },
268 link: edit(/^!?\[(label)\]\((.*?)\)/)
269 .replace('label', inline._label)
270 .getRegex(),
271 reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
272 .replace('label', inline._label)
273 .getRegex()
274});
275
276/**
277 * GFM Inline Grammar
278 */
279
280inline.gfm = merge({}, inline.normal, {
281 escape: edit(inline.escape).replace('])', '~|])').getRegex(),
282 _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
283 url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
284 _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
285 del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
286 text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
287});
288
289inline.gfm.url = edit(inline.gfm.url, 'i')
290 .replace('email', inline.gfm._extended_email)
291 .getRegex();
292/**
293 * GFM + Line Breaks Inline Grammar
294 */
295
296inline.breaks = merge({}, inline.gfm, {
297 br: edit(inline.br).replace('{2,}', '*').getRegex(),
298 text: edit(inline.gfm.text)
299 .replace('\\b_', '\\b_| {2,}\\n')
300 .replace(/\{2,\}/g, '*')
301 .getRegex()
302});