1 |
|
2 | ;(function (sax) {
|
3 |
|
4 | sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
|
5 | sax.SAXParser = SAXParser
|
6 | sax.SAXStream = SAXStream
|
7 | sax.createStream = createStream
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 | sax.MAX_BUFFER_LENGTH = 64 * 1024
|
19 |
|
20 | var buffers = [
|
21 | "comment", "sgmlDecl", "textNode", "tagName", "doctype",
|
22 | "procInstName", "procInstBody", "entity", "attribName",
|
23 | "attribValue", "cdata", "script"
|
24 | ]
|
25 |
|
26 | sax.EVENTS =
|
27 | [ "text"
|
28 | , "processinginstruction"
|
29 | , "sgmldeclaration"
|
30 | , "doctype"
|
31 | , "comment"
|
32 | , "attribute"
|
33 | , "opentag"
|
34 | , "closetag"
|
35 | , "opencdata"
|
36 | , "cdata"
|
37 | , "closecdata"
|
38 | , "error"
|
39 | , "end"
|
40 | , "ready"
|
41 | , "script"
|
42 | , "opennamespace"
|
43 | , "closenamespace"
|
44 | ]
|
45 |
|
46 | function SAXParser (strict, opt) {
|
47 | if (!(this instanceof SAXParser)) return new SAXParser(strict, opt)
|
48 |
|
49 | var parser = this
|
50 | clearBuffers(parser)
|
51 | parser.q = parser.c = ""
|
52 | parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
|
53 | parser.opt = opt || {}
|
54 | parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags;
|
55 | parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"
|
56 | parser.tags = []
|
57 | parser.closed = parser.closedRoot = parser.sawRoot = false
|
58 | parser.tag = parser.error = null
|
59 | parser.strict = !!strict
|
60 | parser.noscript = !!(strict || parser.opt.noscript)
|
61 | parser.state = S.BEGIN
|
62 | parser.ENTITIES = Object.create(sax.ENTITIES)
|
63 | parser.attribList = []
|
64 |
|
65 |
|
66 |
|
67 |
|
68 | if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
|
69 |
|
70 |
|
71 | parser.trackPosition = parser.opt.position !== false
|
72 | if (parser.trackPosition) {
|
73 | parser.position = parser.line = parser.column = 0
|
74 | }
|
75 | emit(parser, "onready")
|
76 | }
|
77 |
|
78 | if (!Object.create) Object.create = function (o) {
|
79 | function f () { this.__proto__ = o }
|
80 | f.prototype = o
|
81 | return new f
|
82 | }
|
83 |
|
84 | if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) {
|
85 | return o.__proto__
|
86 | }
|
87 |
|
88 | if (!Object.keys) Object.keys = function (o) {
|
89 | var a = []
|
90 | for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
|
91 | return a
|
92 | }
|
93 |
|
94 | function checkBufferLength (parser) {
|
95 | var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
|
96 | , maxActual = 0
|
97 | for (var i = 0, l = buffers.length; i < l; i ++) {
|
98 | var len = parser[buffers[i]].length
|
99 | if (len > maxAllowed) {
|
100 |
|
101 |
|
102 |
|
103 |
|
104 | switch (buffers[i]) {
|
105 | case "textNode":
|
106 | closeText(parser)
|
107 | break
|
108 |
|
109 | case "cdata":
|
110 | emitNode(parser, "oncdata", parser.cdata)
|
111 | parser.cdata = ""
|
112 | break
|
113 |
|
114 | case "script":
|
115 | emitNode(parser, "onscript", parser.script)
|
116 | parser.script = ""
|
117 | break
|
118 |
|
119 | default:
|
120 | error(parser, "Max buffer length exceeded: "+buffers[i])
|
121 | }
|
122 | }
|
123 | maxActual = Math.max(maxActual, len)
|
124 | }
|
125 |
|
126 | parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
|
127 | + parser.position
|
128 | }
|
129 |
|
130 | function clearBuffers (parser) {
|
131 | for (var i = 0, l = buffers.length; i < l; i ++) {
|
132 | parser[buffers[i]] = ""
|
133 | }
|
134 | }
|
135 |
|
136 | SAXParser.prototype =
|
137 | { end: function () { end(this) }
|
138 | , write: write
|
139 | , resume: function () { this.error = null; return this }
|
140 | , close: function () { return this.write(null) }
|
141 | }
|
142 |
|
143 | try {
|
144 | var Stream = require("stream").Stream
|
145 | } catch (ex) {
|
146 | var Stream = function () {}
|
147 | }
|
148 |
|
149 |
|
150 | var streamWraps = sax.EVENTS.filter(function (ev) {
|
151 | return ev !== "error" && ev !== "end"
|
152 | })
|
153 |
|
154 | function createStream (strict, opt) {
|
155 | return new SAXStream(strict, opt)
|
156 | }
|
157 |
|
158 | function SAXStream (strict, opt) {
|
159 | if (!(this instanceof SAXStream)) return new SAXStream(strict, opt)
|
160 |
|
161 | Stream.apply(me)
|
162 |
|
163 | this._parser = new SAXParser(strict, opt)
|
164 | this.writable = true
|
165 | this.readable = true
|
166 |
|
167 |
|
168 | var me = this
|
169 |
|
170 | this._parser.onend = function () {
|
171 | me.emit("end")
|
172 | }
|
173 |
|
174 | this._parser.onerror = function (er) {
|
175 | me.emit("error", er)
|
176 |
|
177 |
|
178 |
|
179 | me._parser.error = null
|
180 | }
|
181 |
|
182 | streamWraps.forEach(function (ev) {
|
183 | Object.defineProperty(me, "on" + ev, {
|
184 | get: function () { return me._parser["on" + ev] },
|
185 | set: function (h) {
|
186 | if (!h) {
|
187 | me.removeAllListeners(ev)
|
188 | return me._parser["on"+ev] = h
|
189 | }
|
190 | me.on(ev, h)
|
191 | },
|
192 | enumerable: true,
|
193 | configurable: false
|
194 | })
|
195 | })
|
196 | }
|
197 |
|
198 | SAXStream.prototype = Object.create(Stream.prototype,
|
199 | { constructor: { value: SAXStream } })
|
200 |
|
201 | SAXStream.prototype.write = function (data) {
|
202 | this._parser.write(data.toString())
|
203 | this.emit("data", data)
|
204 | return true
|
205 | }
|
206 |
|
207 | SAXStream.prototype.end = function (chunk) {
|
208 | if (chunk && chunk.length) this._parser.write(chunk.toString())
|
209 | this._parser.end()
|
210 | return true
|
211 | }
|
212 |
|
213 | SAXStream.prototype.on = function (ev, handler) {
|
214 | var me = this
|
215 | if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
|
216 | me._parser["on"+ev] = function () {
|
217 | var args = arguments.length === 1 ? [arguments[0]]
|
218 | : Array.apply(null, arguments)
|
219 | args.splice(0, 0, ev)
|
220 | me.emit.apply(me, args)
|
221 | }
|
222 | }
|
223 |
|
224 | return Stream.prototype.on.call(me, ev, handler)
|
225 | }
|
226 |
|
227 |
|
228 |
|
229 |
|
230 | var whitespace = "\r\n\t "
|
231 |
|
232 |
|
233 | , number = "0124356789"
|
234 | , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
235 |
|
236 | , nameStart = letter+"_:"
|
237 | , nameBody = nameStart+number+"-."
|
238 | , quote = "'\""
|
239 | , entity = number+letter+"#"
|
240 | , attribEnd = whitespace + ">"
|
241 | , CDATA = "[CDATA["
|
242 | , DOCTYPE = "DOCTYPE"
|
243 | , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
244 | , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
|
245 | , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
|
246 |
|
247 |
|
248 | whitespace = charClass(whitespace)
|
249 | number = charClass(number)
|
250 | letter = charClass(letter)
|
251 | nameStart = charClass(nameStart)
|
252 | nameBody = charClass(nameBody)
|
253 | quote = charClass(quote)
|
254 | entity = charClass(entity)
|
255 | attribEnd = charClass(attribEnd)
|
256 |
|
257 | function charClass (str) {
|
258 | return str.split("").reduce(function (s, c) {
|
259 | s[c] = true
|
260 | return s
|
261 | }, {})
|
262 | }
|
263 |
|
264 | function is (charclass, c) {
|
265 | return charclass[c]
|
266 | }
|
267 |
|
268 | function not (charclass, c) {
|
269 | return !charclass[c]
|
270 | }
|
271 |
|
272 | var S = 0
|
273 | sax.STATE =
|
274 | { BEGIN : S++
|
275 | , TEXT : S++
|
276 | , TEXT_ENTITY : S++
|
277 | , OPEN_WAKA : S++
|
278 | , SGML_DECL : S++
|
279 | , SGML_DECL_QUOTED : S++
|
280 | , DOCTYPE : S++
|
281 | , DOCTYPE_QUOTED : S++
|
282 | , DOCTYPE_DTD : S++
|
283 | , DOCTYPE_DTD_QUOTED : S++
|
284 | , COMMENT_STARTING : S++
|
285 | , COMMENT : S++
|
286 | , COMMENT_ENDING : S++
|
287 | , COMMENT_ENDED : S++
|
288 | , CDATA : S++
|
289 | , CDATA_ENDING : S++
|
290 | , CDATA_ENDING_2 : S++
|
291 | , PROC_INST : S++
|
292 | , PROC_INST_BODY : S++
|
293 | , PROC_INST_QUOTED : S++
|
294 | , PROC_INST_ENDING : S++
|
295 | , OPEN_TAG : S++
|
296 | , OPEN_TAG_SLASH : S++
|
297 | , ATTRIB : S++
|
298 | , ATTRIB_NAME : S++
|
299 | , ATTRIB_NAME_SAW_WHITE : S++
|
300 | , ATTRIB_VALUE : S++
|
301 | , ATTRIB_VALUE_QUOTED : S++
|
302 | , ATTRIB_VALUE_UNQUOTED : S++
|
303 | , ATTRIB_VALUE_ENTITY_Q : S++
|
304 | , ATTRIB_VALUE_ENTITY_U : S++
|
305 | , CLOSE_TAG : S++
|
306 | , CLOSE_TAG_SAW_WHITE : S++
|
307 | , SCRIPT : S++
|
308 | , SCRIPT_ENDING : S++
|
309 | }
|
310 |
|
311 | sax.ENTITIES =
|
312 | { "apos" : "'"
|
313 | , "quot" : "\""
|
314 | , "amp" : "&"
|
315 | , "gt" : ">"
|
316 | , "lt" : "<"
|
317 | }
|
318 |
|
319 | for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
|
320 |
|
321 |
|
322 | S = sax.STATE
|
323 |
|
324 | function emit (parser, event, data) {
|
325 | parser[event] && parser[event](data)
|
326 | }
|
327 |
|
328 | function emitNode (parser, nodeType, data) {
|
329 | if (parser.textNode) closeText(parser)
|
330 | emit(parser, nodeType, data)
|
331 | }
|
332 |
|
333 | function closeText (parser) {
|
334 | parser.textNode = textopts(parser.opt, parser.textNode)
|
335 | if (parser.textNode) emit(parser, "ontext", parser.textNode)
|
336 | parser.textNode = ""
|
337 | }
|
338 |
|
339 | function textopts (opt, text) {
|
340 | if (opt.trim) text = text.trim()
|
341 | if (opt.normalize) text = text.replace(/\s+/g, " ")
|
342 | return text
|
343 | }
|
344 |
|
345 | function error (parser, er) {
|
346 | closeText(parser)
|
347 | if (parser.trackPosition) {
|
348 | er += "\nLine: "+parser.line+
|
349 | "\nColumn: "+parser.column+
|
350 | "\nChar: "+parser.c
|
351 | }
|
352 | er = new Error(er)
|
353 | parser.error = er
|
354 | emit(parser, "onerror", er)
|
355 | return parser
|
356 | }
|
357 |
|
358 | function end (parser) {
|
359 | if (parser.state !== S.TEXT) error(parser, "Unexpected end")
|
360 | closeText(parser)
|
361 | parser.c = ""
|
362 | parser.closed = true
|
363 | emit(parser, "onend")
|
364 | SAXParser.call(parser, parser.strict, parser.opt)
|
365 | return parser
|
366 | }
|
367 |
|
368 | function strictFail (parser, message) {
|
369 | if (parser.strict) error(parser, message)
|
370 | }
|
371 |
|
372 | function newTag (parser) {
|
373 | if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
|
374 | var parent = parser.tags[parser.tags.length - 1] || parser
|
375 | , tag = parser.tag = { name : parser.tagName, attributes : {} }
|
376 |
|
377 |
|
378 | if (parser.opt.xmlns) tag.ns = parent.ns
|
379 | parser.attribList.length = 0
|
380 | }
|
381 |
|
382 | function qname (name) {
|
383 | var i = name.indexOf(":")
|
384 | , qualName = i < 0 ? [ "", name ] : name.split(":")
|
385 | , prefix = qualName[0]
|
386 | , local = qualName[1]
|
387 |
|
388 |
|
389 | if (name === "xmlns") {
|
390 | prefix = "xmlns"
|
391 | local = ""
|
392 | }
|
393 |
|
394 | return { prefix: prefix, local: local }
|
395 | }
|
396 |
|
397 | function attrib (parser) {
|
398 | if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]()
|
399 | if (parser.opt.xmlns) {
|
400 | var qn = qname(parser.attribName)
|
401 | , prefix = qn.prefix
|
402 | , local = qn.local
|
403 |
|
404 | if (prefix === "xmlns") {
|
405 |
|
406 | if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
|
407 | strictFail( parser
|
408 | , "xml: prefix must be bound to " + XML_NAMESPACE + "\n"
|
409 | + "Actual: " + parser.attribValue )
|
410 | } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) {
|
411 | strictFail( parser
|
412 | , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n"
|
413 | + "Actual: " + parser.attribValue )
|
414 | } else {
|
415 | var tag = parser.tag
|
416 | , parent = parser.tags[parser.tags.length - 1] || parser
|
417 | if (tag.ns === parent.ns) {
|
418 | tag.ns = Object.create(parent.ns)
|
419 | }
|
420 | tag.ns[local] = parser.attribValue
|
421 | }
|
422 | }
|
423 |
|
424 |
|
425 |
|
426 |
|
427 | parser.attribList.push([parser.attribName, parser.attribValue])
|
428 | } else {
|
429 |
|
430 | parser.tag.attributes[parser.attribName] = parser.attribValue
|
431 | emitNode( parser
|
432 | , "onattribute"
|
433 | , { name: parser.attribName
|
434 | , value: parser.attribValue } )
|
435 | }
|
436 |
|
437 | parser.attribName = parser.attribValue = ""
|
438 | }
|
439 |
|
440 | function openTag (parser, selfClosing) {
|
441 | if (parser.opt.xmlns) {
|
442 |
|
443 | var tag = parser.tag
|
444 |
|
445 |
|
446 | var qn = qname(parser.tagName)
|
447 | tag.prefix = qn.prefix
|
448 | tag.local = qn.local
|
449 | tag.uri = tag.ns[qn.prefix] || qn.prefix
|
450 |
|
451 | if (tag.prefix && !tag.uri) {
|
452 | strictFail(parser, "Unbound namespace prefix: "
|
453 | + JSON.stringify(parser.tagName))
|
454 | }
|
455 |
|
456 | var parent = parser.tags[parser.tags.length - 1] || parser
|
457 | if (tag.ns && parent.ns !== tag.ns) {
|
458 | Object.keys(tag.ns).forEach(function (p) {
|
459 | emitNode( parser
|
460 | , "onopennamespace"
|
461 | , { prefix: p , uri: tag.ns[p] } )
|
462 | })
|
463 | }
|
464 |
|
465 |
|
466 | for (var i = 0, l = parser.attribList.length; i < l; i ++) {
|
467 | var nv = parser.attribList[i]
|
468 | var name = nv[0]
|
469 | , value = nv[1]
|
470 | , qualName = qname(name)
|
471 | , prefix = qualName.prefix
|
472 | , local = qualName.local
|
473 | , uri = tag.ns[prefix] || ""
|
474 | , a = { name: name
|
475 | , value: value
|
476 | , prefix: prefix
|
477 | , local: local
|
478 | , uri: uri
|
479 | }
|
480 |
|
481 |
|
482 |
|
483 | if (prefix && prefix != "xmlns" && !uri) {
|
484 | strictFail(parser, "Unbound namespace prefix: "
|
485 | + JSON.stringify(prefix))
|
486 | a.uri = prefix
|
487 | }
|
488 | parser.tag.attributes[name] = a
|
489 | emitNode(parser, "onattribute", a)
|
490 | }
|
491 | parser.attribList.length = 0
|
492 | }
|
493 |
|
494 |
|
495 | parser.sawRoot = true
|
496 | parser.tags.push(parser.tag)
|
497 | emitNode(parser, "onopentag", parser.tag)
|
498 | if (!selfClosing) {
|
499 |
|
500 | if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
|
501 | parser.state = S.SCRIPT
|
502 | } else {
|
503 | parser.state = S.TEXT
|
504 | }
|
505 | parser.tag = null
|
506 | parser.tagName = ""
|
507 | }
|
508 | parser.attribName = parser.attribValue = ""
|
509 | parser.attribList.length = 0
|
510 | }
|
511 |
|
512 | function closeTag (parser) {
|
513 | if (!parser.tagName) {
|
514 | strictFail(parser, "Weird empty close tag.")
|
515 | parser.textNode += "</>"
|
516 | parser.state = S.TEXT
|
517 | return
|
518 | }
|
519 |
|
520 |
|
521 | var t = parser.tags.length
|
522 | var tagName = parser.tagName
|
523 | if (!parser.strict) tagName = tagName[parser.looseCase]()
|
524 | var closeTo = tagName
|
525 | while (t --) {
|
526 | var close = parser.tags[t]
|
527 | if (close.name !== closeTo) {
|
528 |
|
529 | strictFail(parser, "Unexpected close tag")
|
530 | } else break
|
531 | }
|
532 |
|
533 |
|
534 | if (t < 0) {
|
535 | strictFail(parser, "Unmatched closing tag: "+parser.tagName)
|
536 | parser.textNode += "</" + parser.tagName + ">"
|
537 | parser.state = S.TEXT
|
538 | return
|
539 | }
|
540 | parser.tagName = tagName
|
541 | var s = parser.tags.length
|
542 | while (s --> t) {
|
543 | var tag = parser.tag = parser.tags.pop()
|
544 | parser.tagName = parser.tag.name
|
545 | emitNode(parser, "onclosetag", parser.tagName)
|
546 |
|
547 | var x = {}
|
548 | for (var i in tag.ns) x[i] = tag.ns[i]
|
549 |
|
550 | var parent = parser.tags[parser.tags.length - 1] || parser
|
551 | if (parser.opt.xmlns && tag.ns !== parent.ns) {
|
552 |
|
553 | Object.keys(tag.ns).forEach(function (p) {
|
554 | var n = tag.ns[p]
|
555 | emitNode(parser, "onclosenamespace", { prefix: p, uri: n })
|
556 | })
|
557 | }
|
558 | }
|
559 | if (t === 0) parser.closedRoot = true
|
560 | parser.tagName = parser.attribValue = parser.attribName = ""
|
561 | parser.attribList.length = 0
|
562 | parser.state = S.TEXT
|
563 | }
|
564 |
|
565 | function parseEntity (parser) {
|
566 | var entity = parser.entity.toLowerCase()
|
567 | , num
|
568 | , numStr = ""
|
569 | if (parser.ENTITIES[entity]) return parser.ENTITIES[entity]
|
570 | if (entity.charAt(0) === "#") {
|
571 | if (entity.charAt(1) === "x") {
|
572 | entity = entity.slice(2)
|
573 | num = parseInt(entity, 16)
|
574 | numStr = num.toString(16)
|
575 | } else {
|
576 | entity = entity.slice(1)
|
577 | num = parseInt(entity, 10)
|
578 | numStr = num.toString(10)
|
579 | }
|
580 | }
|
581 | entity = entity.replace(/^0+/, "")
|
582 | if (numStr.toLowerCase() !== entity) {
|
583 | strictFail(parser, "Invalid character entity")
|
584 | return "&"+parser.entity + ";"
|
585 | }
|
586 | return String.fromCharCode(num)
|
587 | }
|
588 |
|
589 | function write (chunk) {
|
590 | var parser = this
|
591 | if (this.error) throw this.error
|
592 | if (parser.closed) return error(parser,
|
593 | "Cannot write after close. Assign an onready handler.")
|
594 | if (chunk === null) return end(parser)
|
595 | var i = 0, c = ""
|
596 | while (parser.c = c = chunk.charAt(i++)) {
|
597 | if (parser.trackPosition) {
|
598 | parser.position ++
|
599 | if (c === "\n") {
|
600 | parser.line ++
|
601 | parser.column = 0
|
602 | } else parser.column ++
|
603 | }
|
604 | switch (parser.state) {
|
605 |
|
606 | case S.BEGIN:
|
607 | if (c === "<") parser.state = S.OPEN_WAKA
|
608 | else if (not(whitespace,c)) {
|
609 |
|
610 |
|
611 | strictFail(parser, "Non-whitespace before first tag.")
|
612 | parser.textNode = c
|
613 | parser.state = S.TEXT
|
614 | }
|
615 | continue
|
616 |
|
617 | case S.TEXT:
|
618 | if (parser.sawRoot && !parser.closedRoot) {
|
619 | var starti = i-1
|
620 | while (c && c!=="<" && c!=="&") {
|
621 | c = chunk.charAt(i++)
|
622 | if (c && parser.trackPosition) {
|
623 | parser.position ++
|
624 | if (c === "\n") {
|
625 | parser.line ++
|
626 | parser.column = 0
|
627 | } else parser.column ++
|
628 | }
|
629 | }
|
630 | parser.textNode += chunk.substring(starti, i-1)
|
631 | }
|
632 | if (c === "<") parser.state = S.OPEN_WAKA
|
633 | else {
|
634 | if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
|
635 | strictFail("Text data outside of root node.")
|
636 | if (c === "&") parser.state = S.TEXT_ENTITY
|
637 | else parser.textNode += c
|
638 | }
|
639 | continue
|
640 |
|
641 | case S.SCRIPT:
|
642 |
|
643 | if (c === "<") {
|
644 | parser.state = S.SCRIPT_ENDING
|
645 | } else parser.script += c
|
646 | continue
|
647 |
|
648 | case S.SCRIPT_ENDING:
|
649 | if (c === "/") {
|
650 | emitNode(parser, "onscript", parser.script)
|
651 | parser.state = S.CLOSE_TAG
|
652 | parser.script = ""
|
653 | parser.tagName = ""
|
654 | } else {
|
655 | parser.script += "<" + c
|
656 | parser.state = S.SCRIPT
|
657 | }
|
658 | continue
|
659 |
|
660 | case S.OPEN_WAKA:
|
661 |
|
662 | if (c === "!") {
|
663 | parser.state = S.SGML_DECL
|
664 | parser.sgmlDecl = ""
|
665 | } else if (is(whitespace, c)) {
|
666 |
|
667 | } else if (is(nameStart,c)) {
|
668 | parser.startTagPosition = parser.position - 1
|
669 | parser.state = S.OPEN_TAG
|
670 | parser.tagName = c
|
671 | } else if (c === "/") {
|
672 | parser.startTagPosition = parser.position - 1
|
673 | parser.state = S.CLOSE_TAG
|
674 | parser.tagName = ""
|
675 | } else if (c === "?") {
|
676 | parser.state = S.PROC_INST
|
677 | parser.procInstName = parser.procInstBody = ""
|
678 | } else {
|
679 | strictFail(parser, "Unencoded <")
|
680 | parser.textNode += "<" + c
|
681 | parser.state = S.TEXT
|
682 | }
|
683 | continue
|
684 |
|
685 | case S.SGML_DECL:
|
686 | if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
|
687 | emitNode(parser, "onopencdata")
|
688 | parser.state = S.CDATA
|
689 | parser.sgmlDecl = ""
|
690 | parser.cdata = ""
|
691 | } else if (parser.sgmlDecl+c === "--") {
|
692 | parser.state = S.COMMENT
|
693 | parser.comment = ""
|
694 | parser.sgmlDecl = ""
|
695 | } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
|
696 | parser.state = S.DOCTYPE
|
697 | if (parser.doctype || parser.sawRoot) strictFail(parser,
|
698 | "Inappropriately located doctype declaration")
|
699 | parser.doctype = ""
|
700 | parser.sgmlDecl = ""
|
701 | } else if (c === ">") {
|
702 | emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
|
703 | parser.sgmlDecl = ""
|
704 | parser.state = S.TEXT
|
705 | } else if (is(quote, c)) {
|
706 | parser.state = S.SGML_DECL_QUOTED
|
707 | parser.sgmlDecl += c
|
708 | } else parser.sgmlDecl += c
|
709 | continue
|
710 |
|
711 | case S.SGML_DECL_QUOTED:
|
712 | if (c === parser.q) {
|
713 | parser.state = S.SGML_DECL
|
714 | parser.q = ""
|
715 | }
|
716 | parser.sgmlDecl += c
|
717 | continue
|
718 |
|
719 | case S.DOCTYPE:
|
720 | if (c === ">") {
|
721 | parser.state = S.TEXT
|
722 | emitNode(parser, "ondoctype", parser.doctype)
|
723 | parser.doctype = true
|
724 | } else {
|
725 | parser.doctype += c
|
726 | if (c === "[") parser.state = S.DOCTYPE_DTD
|
727 | else if (is(quote, c)) {
|
728 | parser.state = S.DOCTYPE_QUOTED
|
729 | parser.q = c
|
730 | }
|
731 | }
|
732 | continue
|
733 |
|
734 | case S.DOCTYPE_QUOTED:
|
735 | parser.doctype += c
|
736 | if (c === parser.q) {
|
737 | parser.q = ""
|
738 | parser.state = S.DOCTYPE
|
739 | }
|
740 | continue
|
741 |
|
742 | case S.DOCTYPE_DTD:
|
743 | parser.doctype += c
|
744 | if (c === "]") parser.state = S.DOCTYPE
|
745 | else if (is(quote,c)) {
|
746 | parser.state = S.DOCTYPE_DTD_QUOTED
|
747 | parser.q = c
|
748 | }
|
749 | continue
|
750 |
|
751 | case S.DOCTYPE_DTD_QUOTED:
|
752 | parser.doctype += c
|
753 | if (c === parser.q) {
|
754 | parser.state = S.DOCTYPE_DTD
|
755 | parser.q = ""
|
756 | }
|
757 | continue
|
758 |
|
759 | case S.COMMENT:
|
760 | if (c === "-") parser.state = S.COMMENT_ENDING
|
761 | else parser.comment += c
|
762 | continue
|
763 |
|
764 | case S.COMMENT_ENDING:
|
765 | if (c === "-") {
|
766 | parser.state = S.COMMENT_ENDED
|
767 | parser.comment = textopts(parser.opt, parser.comment)
|
768 | if (parser.comment) emitNode(parser, "oncomment", parser.comment)
|
769 | parser.comment = ""
|
770 | } else {
|
771 | parser.comment += "-" + c
|
772 | parser.state = S.COMMENT
|
773 | }
|
774 | continue
|
775 |
|
776 | case S.COMMENT_ENDED:
|
777 | if (c !== ">") {
|
778 | strictFail(parser, "Malformed comment")
|
779 |
|
780 |
|
781 | parser.comment += "--" + c
|
782 | parser.state = S.COMMENT
|
783 | } else parser.state = S.TEXT
|
784 | continue
|
785 |
|
786 | case S.CDATA:
|
787 | if (c === "]") parser.state = S.CDATA_ENDING
|
788 | else parser.cdata += c
|
789 | continue
|
790 |
|
791 | case S.CDATA_ENDING:
|
792 | if (c === "]") parser.state = S.CDATA_ENDING_2
|
793 | else {
|
794 | parser.cdata += "]" + c
|
795 | parser.state = S.CDATA
|
796 | }
|
797 | continue
|
798 |
|
799 | case S.CDATA_ENDING_2:
|
800 | if (c === ">") {
|
801 | if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
|
802 | emitNode(parser, "onclosecdata")
|
803 | parser.cdata = ""
|
804 | parser.state = S.TEXT
|
805 | } else if (c === "]") {
|
806 | parser.cdata += "]"
|
807 | } else {
|
808 | parser.cdata += "]]" + c
|
809 | parser.state = S.CDATA
|
810 | }
|
811 | continue
|
812 |
|
813 | case S.PROC_INST:
|
814 | if (c === "?") parser.state = S.PROC_INST_ENDING
|
815 | else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
|
816 | else parser.procInstName += c
|
817 | continue
|
818 |
|
819 | case S.PROC_INST_BODY:
|
820 | if (!parser.procInstBody && is(whitespace, c)) continue
|
821 | else if (c === "?") parser.state = S.PROC_INST_ENDING
|
822 | else if (is(quote, c)) {
|
823 | parser.state = S.PROC_INST_QUOTED
|
824 | parser.q = c
|
825 | parser.procInstBody += c
|
826 | } else parser.procInstBody += c
|
827 | continue
|
828 |
|
829 | case S.PROC_INST_ENDING:
|
830 | if (c === ">") {
|
831 | emitNode(parser, "onprocessinginstruction", {
|
832 | name : parser.procInstName,
|
833 | body : parser.procInstBody
|
834 | })
|
835 | parser.procInstName = parser.procInstBody = ""
|
836 | parser.state = S.TEXT
|
837 | } else {
|
838 | parser.procInstBody += "?" + c
|
839 | parser.state = S.PROC_INST_BODY
|
840 | }
|
841 | continue
|
842 |
|
843 | case S.PROC_INST_QUOTED:
|
844 | parser.procInstBody += c
|
845 | if (c === parser.q) {
|
846 | parser.state = S.PROC_INST_BODY
|
847 | parser.q = ""
|
848 | }
|
849 | continue
|
850 |
|
851 | case S.OPEN_TAG:
|
852 | if (is(nameBody, c)) parser.tagName += c
|
853 | else {
|
854 | newTag(parser)
|
855 | if (c === ">") openTag(parser)
|
856 | else if (c === "/") parser.state = S.OPEN_TAG_SLASH
|
857 | else {
|
858 | if (not(whitespace, c)) strictFail(
|
859 | parser, "Invalid character in tag name")
|
860 | parser.state = S.ATTRIB
|
861 | }
|
862 | }
|
863 | continue
|
864 |
|
865 | case S.OPEN_TAG_SLASH:
|
866 | if (c === ">") {
|
867 | openTag(parser, true)
|
868 | closeTag(parser)
|
869 | } else {
|
870 | strictFail(parser, "Forward-slash in opening tag not followed by >")
|
871 | parser.state = S.ATTRIB
|
872 | }
|
873 | continue
|
874 |
|
875 | case S.ATTRIB:
|
876 |
|
877 | if (is(whitespace, c)) continue
|
878 | else if (c === ">") openTag(parser)
|
879 | else if (c === "/") parser.state = S.OPEN_TAG_SLASH
|
880 | else if (is(nameStart, c)) {
|
881 | parser.attribName = c
|
882 | parser.attribValue = ""
|
883 | parser.state = S.ATTRIB_NAME
|
884 | } else strictFail(parser, "Invalid attribute name")
|
885 | continue
|
886 |
|
887 | case S.ATTRIB_NAME:
|
888 | if (c === "=") parser.state = S.ATTRIB_VALUE
|
889 | else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
|
890 | else if (is(nameBody, c)) parser.attribName += c
|
891 | else strictFail(parser, "Invalid attribute name")
|
892 | continue
|
893 |
|
894 | case S.ATTRIB_NAME_SAW_WHITE:
|
895 | if (c === "=") parser.state = S.ATTRIB_VALUE
|
896 | else if (is(whitespace, c)) continue
|
897 | else {
|
898 | strictFail(parser, "Attribute without value")
|
899 | parser.tag.attributes[parser.attribName] = ""
|
900 | parser.attribValue = ""
|
901 | emitNode(parser, "onattribute",
|
902 | { name : parser.attribName, value : "" })
|
903 | parser.attribName = ""
|
904 | if (c === ">") openTag(parser)
|
905 | else if (is(nameStart, c)) {
|
906 | parser.attribName = c
|
907 | parser.state = S.ATTRIB_NAME
|
908 | } else {
|
909 | strictFail(parser, "Invalid attribute name")
|
910 | parser.state = S.ATTRIB
|
911 | }
|
912 | }
|
913 | continue
|
914 |
|
915 | case S.ATTRIB_VALUE:
|
916 | if (is(whitespace, c)) continue
|
917 | else if (is(quote, c)) {
|
918 | parser.q = c
|
919 | parser.state = S.ATTRIB_VALUE_QUOTED
|
920 | } else {
|
921 | strictFail(parser, "Unquoted attribute value")
|
922 | parser.state = S.ATTRIB_VALUE_UNQUOTED
|
923 | parser.attribValue = c
|
924 | }
|
925 | continue
|
926 |
|
927 | case S.ATTRIB_VALUE_QUOTED:
|
928 | if (c !== parser.q) {
|
929 | if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
|
930 | else parser.attribValue += c
|
931 | continue
|
932 | }
|
933 | attrib(parser)
|
934 | parser.q = ""
|
935 | parser.state = S.ATTRIB
|
936 | continue
|
937 |
|
938 | case S.ATTRIB_VALUE_UNQUOTED:
|
939 | if (not(attribEnd,c)) {
|
940 | if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
|
941 | else parser.attribValue += c
|
942 | continue
|
943 | }
|
944 | attrib(parser)
|
945 | if (c === ">") openTag(parser)
|
946 | else parser.state = S.ATTRIB
|
947 | continue
|
948 |
|
949 | case S.CLOSE_TAG:
|
950 | if (!parser.tagName) {
|
951 | if (is(whitespace, c)) continue
|
952 | else if (not(nameStart, c)) strictFail(parser,
|
953 | "Invalid tagname in closing tag.")
|
954 | else parser.tagName = c
|
955 | }
|
956 | else if (c === ">") closeTag(parser)
|
957 | else if (is(nameBody, c)) parser.tagName += c
|
958 | else {
|
959 | if (not(whitespace, c)) strictFail(parser,
|
960 | "Invalid tagname in closing tag")
|
961 | parser.state = S.CLOSE_TAG_SAW_WHITE
|
962 | }
|
963 | continue
|
964 |
|
965 | case S.CLOSE_TAG_SAW_WHITE:
|
966 | if (is(whitespace, c)) continue
|
967 | if (c === ">") closeTag(parser)
|
968 | else strictFail("Invalid characters in closing tag")
|
969 | continue
|
970 |
|
971 | case S.TEXT_ENTITY:
|
972 | case S.ATTRIB_VALUE_ENTITY_Q:
|
973 | case S.ATTRIB_VALUE_ENTITY_U:
|
974 | switch(parser.state) {
|
975 | case S.TEXT_ENTITY:
|
976 | var returnState = S.TEXT, buffer = "textNode"
|
977 | break
|
978 |
|
979 | case S.ATTRIB_VALUE_ENTITY_Q:
|
980 | var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
|
981 | break
|
982 |
|
983 | case S.ATTRIB_VALUE_ENTITY_U:
|
984 | var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
|
985 | break
|
986 | }
|
987 | if (c === ";") {
|
988 | parser[buffer] += parseEntity(parser)
|
989 | parser.entity = ""
|
990 | parser.state = returnState
|
991 | }
|
992 | else if (is(entity, c)) parser.entity += c
|
993 | else {
|
994 | strictFail("Invalid character entity")
|
995 | parser[buffer] += "&" + parser.entity + c
|
996 | parser.entity = ""
|
997 | parser.state = returnState
|
998 | }
|
999 | continue
|
1000 |
|
1001 | default:
|
1002 | throw new Error(parser, "Unknown state: " + parser.state)
|
1003 | }
|
1004 | }
|
1005 |
|
1006 |
|
1007 |
|
1008 |
|
1009 |
|
1010 | if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
|
1011 | return parser
|
1012 | }
|
1013 |
|
1014 | })(typeof exports === "undefined" ? sax = {} : exports)
|