UNPKG

9.14 kBJavaScriptView Raw
1/**
2 * Not type-checking this file because it's mostly vendor code.
3 */
4
5/*!
6 * HTML Parser By John Resig (ejohn.org)
7 * Modified by Juriy "kangax" Zaytsev
8 * Original code by Erik Arvidsson, Mozilla Public License
9 * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
10 */
11
12import { makeMap, no } from 'shared/util'
13import { isNonPhrasingTag } from 'web/compiler/util'
14import { unicodeLetters } from 'core/util/lang'
15
16// Regular Expressions for parsing tags and attributes
17const attribute = /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
18const dynamicArgAttribute = /^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
19const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeLetters}]*`
20const qnameCapture = `((?:${ncname}\\:)?${ncname})`
21const startTagOpen = new RegExp(`^<${qnameCapture}`)
22const startTagClose = /^\s*(\/?)>/
23const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`)
24const doctype = /^<!DOCTYPE [^>]+>/i
25// #7298: escape - to avoid being pased as HTML comment when inlined in page
26const comment = /^<!\--/
27const conditionalComment = /^<!\[/
28
29// Special Elements (can contain anything)
30export const isPlainTextElement = makeMap('script,style,textarea', true)
31const reCache = {}
32
33const decodingMap = {
34 '&lt;': '<',
35 '&gt;': '>',
36 '&quot;': '"',
37 '&amp;': '&',
38 '&#10;': '\n',
39 '&#9;': '\t',
40 '&#39;': "'"
41}
42const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g
43const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g
44
45// #5992
46const isIgnoreNewlineTag = makeMap('pre,textarea', true)
47const shouldIgnoreFirstNewline = (tag, html) => tag && isIgnoreNewlineTag(tag) && html[0] === '\n'
48
49function decodeAttr (value, shouldDecodeNewlines) {
50 const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr
51 return value.replace(re, match => decodingMap[match])
52}
53
54export function parseHTML (html, options) {
55 const stack = []
56 const expectHTML = options.expectHTML
57 const isUnaryTag = options.isUnaryTag || no
58 const canBeLeftOpenTag = options.canBeLeftOpenTag || no
59 let index = 0
60 let last, lastTag
61 while (html) {
62 last = html
63 // Make sure we're not in a plaintext content element like script/style
64 if (!lastTag || !isPlainTextElement(lastTag)) {
65 let textEnd = html.indexOf('<')
66 if (textEnd === 0) {
67 // Comment:
68 if (comment.test(html)) {
69 const commentEnd = html.indexOf('-->')
70
71 if (commentEnd >= 0) {
72 if (options.shouldKeepComment) {
73 options.comment(html.substring(4, commentEnd), index, index + commentEnd + 3)
74 }
75 advance(commentEnd + 3)
76 continue
77 }
78 }
79
80 // http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
81 if (conditionalComment.test(html)) {
82 const conditionalEnd = html.indexOf(']>')
83
84 if (conditionalEnd >= 0) {
85 advance(conditionalEnd + 2)
86 continue
87 }
88 }
89
90 // Doctype:
91 const doctypeMatch = html.match(doctype)
92 if (doctypeMatch) {
93 advance(doctypeMatch[0].length)
94 continue
95 }
96
97 // End tag:
98 const endTagMatch = html.match(endTag)
99 if (endTagMatch) {
100 const curIndex = index
101 advance(endTagMatch[0].length)
102 parseEndTag(endTagMatch[1], curIndex, index)
103 continue
104 }
105
106 // Start tag:
107 const startTagMatch = parseStartTag()
108 if (startTagMatch) {
109 handleStartTag(startTagMatch)
110 if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) {
111 advance(1)
112 }
113 continue
114 }
115 }
116
117 let text, rest, next
118 if (textEnd >= 0) {
119 rest = html.slice(textEnd)
120 while (
121 !endTag.test(rest) &&
122 !startTagOpen.test(rest) &&
123 !comment.test(rest) &&
124 !conditionalComment.test(rest)
125 ) {
126 // < in plain text, be forgiving and treat it as text
127 next = rest.indexOf('<', 1)
128 if (next < 0) break
129 textEnd += next
130 rest = html.slice(textEnd)
131 }
132 text = html.substring(0, textEnd)
133 }
134
135 if (textEnd < 0) {
136 text = html
137 }
138
139 if (text) {
140 advance(text.length)
141 }
142
143 if (options.chars && text) {
144 options.chars(text, index - text.length, index)
145 }
146 } else {
147 let endTagLength = 0
148 const stackedTag = lastTag.toLowerCase()
149 const reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)(</' + stackedTag + '[^>]*>)', 'i'))
150 const rest = html.replace(reStackedTag, function (all, text, endTag) {
151 endTagLength = endTag.length
152 if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {
153 text = text
154 .replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298
155 .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1')
156 }
157 if (shouldIgnoreFirstNewline(stackedTag, text)) {
158 text = text.slice(1)
159 }
160 if (options.chars) {
161 options.chars(text)
162 }
163 return ''
164 })
165 index += html.length - rest.length
166 html = rest
167 parseEndTag(stackedTag, index - endTagLength, index)
168 }
169
170 if (html === last) {
171 options.chars && options.chars(html)
172 if (process.env.NODE_ENV !== 'production' && !stack.length && options.warn) {
173 options.warn(`Mal-formatted tag at end of template: "${html}"`, { start: index + html.length })
174 }
175 break
176 }
177 }
178
179 // Clean up any remaining tags
180 parseEndTag()
181
182 function advance (n) {
183 index += n
184 html = html.substring(n)
185 }
186
187 function parseStartTag () {
188 const start = html.match(startTagOpen)
189 if (start) {
190 const match = {
191 tagName: start[1],
192 attrs: [],
193 start: index
194 }
195 advance(start[0].length)
196 let end, attr
197 while (!(end = html.match(startTagClose)) && (attr = html.match(dynamicArgAttribute) || html.match(attribute))) {
198 attr.start = index
199 advance(attr[0].length)
200 attr.end = index
201 match.attrs.push(attr)
202 }
203 if (end) {
204 match.unarySlash = end[1]
205 advance(end[0].length)
206 match.end = index
207 return match
208 }
209 }
210 }
211
212 function handleStartTag (match) {
213 const tagName = match.tagName
214 const unarySlash = match.unarySlash
215
216 if (expectHTML) {
217 if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
218 parseEndTag(lastTag)
219 }
220 if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
221 parseEndTag(tagName)
222 }
223 }
224
225 const unary = isUnaryTag(tagName) || !!unarySlash
226
227 const l = match.attrs.length
228 const attrs = new Array(l)
229 for (let i = 0; i < l; i++) {
230 const args = match.attrs[i]
231 const value = args[3] || args[4] || args[5] || ''
232 const shouldDecodeNewlines = tagName === 'a' && args[1] === 'href'
233 ? options.shouldDecodeNewlinesForHref
234 : options.shouldDecodeNewlines
235 attrs[i] = {
236 name: args[1],
237 value: decodeAttr(value, shouldDecodeNewlines)
238 }
239 if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) {
240 attrs[i].start = args.start + args[0].match(/^\s*/).length
241 attrs[i].end = args.end
242 }
243 }
244
245 if (!unary) {
246 stack.push({ tag: tagName, lowerCasedTag: tagName.toLowerCase(), attrs: attrs, start: match.start, end: match.end })
247 lastTag = tagName
248 }
249
250 if (options.start) {
251 options.start(tagName, attrs, unary, match.start, match.end)
252 }
253 }
254
255 function parseEndTag (tagName, start, end) {
256 let pos, lowerCasedTagName
257 if (start == null) start = index
258 if (end == null) end = index
259
260 // Find the closest opened tag of the same type
261 if (tagName) {
262 lowerCasedTagName = tagName.toLowerCase()
263 for (pos = stack.length - 1; pos >= 0; pos--) {
264 if (stack[pos].lowerCasedTag === lowerCasedTagName) {
265 break
266 }
267 }
268 } else {
269 // If no tag name is provided, clean shop
270 pos = 0
271 }
272
273 if (pos >= 0) {
274 // Close all the open elements, up the stack
275 for (let i = stack.length - 1; i >= pos; i--) {
276 if (process.env.NODE_ENV !== 'production' &&
277 (i > pos || !tagName) &&
278 options.warn
279 ) {
280 options.warn(
281 `tag <${stack[i].tag}> has no matching end tag.`,
282 { start: stack[i].start }
283 )
284 }
285 if (options.end) {
286 options.end(stack[i].tag, start, end)
287 }
288 }
289
290 // Remove the open elements from the stack
291 stack.length = pos
292 lastTag = pos && stack[pos - 1].tag
293 } else if (lowerCasedTagName === 'br') {
294 if (options.start) {
295 options.start(tagName, [], true, start, end)
296 }
297 } else if (lowerCasedTagName === 'p') {
298 if (options.start) {
299 options.start(tagName, [], false, start, end)
300 }
301 if (options.end) {
302 options.end(tagName, start, end)
303 }
304 }
305 }
306}