UNPKG

30.1 kBJavaScriptView Raw
1// wrapper for non-node envs
2;(function (sax) {
3
4sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
5sax.SAXParser = SAXParser
6sax.SAXStream = SAXStream
7sax.createStream = createStream
8
9// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
10// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
11// since that's the earliest that a buffer overrun could occur. This way, checks are
12// as rare as required, but as often as necessary to ensure never crossing this bound.
13// Furthermore, buffers are only tested at most once per write(), so passing a very
14// large string into write() might have undesirable effects, but this is manageable by
15// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
16// edge case, result in creating at most one complete copy of the string passed in.
17// Set to Infinity to have unlimited buffers.
18sax.MAX_BUFFER_LENGTH = 64 * 1024
19
20var buffers = [
21 "comment", "sgmlDecl", "textNode", "tagName", "doctype",
22 "procInstName", "procInstBody", "entity", "attribName",
23 "attribValue", "cdata", "script"
24]
25
26sax.EVENTS = // for discoverability.
27 [ "text"
28 , "processinginstruction"
29 , "sgmldeclaration"
30 , "doctype"
31 , "comment"
32 , "attribute"
33 , "opentag"
34 , "closetag"
35 , "opencdata"
36 , "cdata"
37 , "closecdata"
38 , "error"
39 , "end"
40 , "ready"
41 , "script"
42 , "opennamespace"
43 , "closenamespace"
44 ]
45
46function SAXParser (strict, opt) {
47 if (!(this instanceof SAXParser)) return new SAXParser(strict, opt)
48
49 var parser = this
50 clearBuffers(parser)
51 parser.q = parser.c = ""
52 parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
53 parser.opt = opt || {}
54 parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags;
55 parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"
56 parser.tags = []
57 parser.closed = parser.closedRoot = parser.sawRoot = false
58 parser.tag = parser.error = null
59 parser.strict = !!strict
60 parser.noscript = !!(strict || parser.opt.noscript)
61 parser.state = S.BEGIN
62 parser.ENTITIES = Object.create(sax.ENTITIES)
63 parser.attribList = []
64
65 // namespaces form a prototype chain.
66 // it always points at the current tag,
67 // which protos to its parent tag.
68 if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
69
70 // mostly just for error reporting
71 parser.trackPosition = parser.opt.position !== false
72 if (parser.trackPosition) {
73 parser.position = parser.line = parser.column = 0
74 }
75 emit(parser, "onready")
76}
77
78if (!Object.create) Object.create = function (o) {
79 function f () { this.__proto__ = o }
80 f.prototype = o
81 return new f
82}
83
84if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) {
85 return o.__proto__
86}
87
88if (!Object.keys) Object.keys = function (o) {
89 var a = []
90 for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
91 return a
92}
93
94function checkBufferLength (parser) {
95 var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
96 , maxActual = 0
97 for (var i = 0, l = buffers.length; i < l; i ++) {
98 var len = parser[buffers[i]].length
99 if (len > maxAllowed) {
100 // Text/cdata nodes can get big, and since they're buffered,
101 // we can get here under normal conditions.
102 // Avoid issues by emitting the text node now,
103 // so at least it won't get any bigger.
104 switch (buffers[i]) {
105 case "textNode":
106 closeText(parser)
107 break
108
109 case "cdata":
110 emitNode(parser, "oncdata", parser.cdata)
111 parser.cdata = ""
112 break
113
114 case "script":
115 emitNode(parser, "onscript", parser.script)
116 parser.script = ""
117 break
118
119 default:
120 error(parser, "Max buffer length exceeded: "+buffers[i])
121 }
122 }
123 maxActual = Math.max(maxActual, len)
124 }
125 // schedule the next check for the earliest possible buffer overrun.
126 parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
127 + parser.position
128}
129
130function clearBuffers (parser) {
131 for (var i = 0, l = buffers.length; i < l; i ++) {
132 parser[buffers[i]] = ""
133 }
134}
135
136SAXParser.prototype =
137 { end: function () { end(this) }
138 , write: write
139 , resume: function () { this.error = null; return this }
140 , close: function () { return this.write(null) }
141 }
142
143try {
144 var Stream = require("stream").Stream
145} catch (ex) {
146 var Stream = function () {}
147}
148
149
150var streamWraps = sax.EVENTS.filter(function (ev) {
151 return ev !== "error" && ev !== "end"
152})
153
154function createStream (strict, opt) {
155 return new SAXStream(strict, opt)
156}
157
158function SAXStream (strict, opt) {
159 if (!(this instanceof SAXStream)) return new SAXStream(strict, opt)
160
161 Stream.apply(me)
162
163 this._parser = new SAXParser(strict, opt)
164 this.writable = true
165 this.readable = true
166
167
168 var me = this
169
170 this._parser.onend = function () {
171 me.emit("end")
172 }
173
174 this._parser.onerror = function (er) {
175 me.emit("error", er)
176
177 // if didn't throw, then means error was handled.
178 // go ahead and clear error, so we can write again.
179 me._parser.error = null
180 }
181
182 streamWraps.forEach(function (ev) {
183 Object.defineProperty(me, "on" + ev, {
184 get: function () { return me._parser["on" + ev] },
185 set: function (h) {
186 if (!h) {
187 me.removeAllListeners(ev)
188 return me._parser["on"+ev] = h
189 }
190 me.on(ev, h)
191 },
192 enumerable: true,
193 configurable: false
194 })
195 })
196}
197
198SAXStream.prototype = Object.create(Stream.prototype,
199 { constructor: { value: SAXStream } })
200
201SAXStream.prototype.write = function (data) {
202 this._parser.write(data.toString())
203 this.emit("data", data)
204 return true
205}
206
207SAXStream.prototype.end = function (chunk) {
208 if (chunk && chunk.length) this._parser.write(chunk.toString())
209 this._parser.end()
210 return true
211}
212
213SAXStream.prototype.on = function (ev, handler) {
214 var me = this
215 if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
216 me._parser["on"+ev] = function () {
217 var args = arguments.length === 1 ? [arguments[0]]
218 : Array.apply(null, arguments)
219 args.splice(0, 0, ev)
220 me.emit.apply(me, args)
221 }
222 }
223
224 return Stream.prototype.on.call(me, ev, handler)
225}
226
227
228
229// character classes and tokens
230var whitespace = "\r\n\t "
231 // this really needs to be replaced with character classes.
232 // XML allows all manner of ridiculous numbers and digits.
233 , number = "0124356789"
234 , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
235 // (Letter | "_" | ":")
236 , nameStart = letter+"_:"
237 , nameBody = nameStart+number+"-."
238 , quote = "'\""
239 , entity = number+letter+"#"
240 , attribEnd = whitespace + ">"
241 , CDATA = "[CDATA["
242 , DOCTYPE = "DOCTYPE"
243 , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
244 , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
245 , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
246
247// turn all the string character sets into character class objects.
248whitespace = charClass(whitespace)
249number = charClass(number)
250letter = charClass(letter)
251nameStart = charClass(nameStart)
252nameBody = charClass(nameBody)
253quote = charClass(quote)
254entity = charClass(entity)
255attribEnd = charClass(attribEnd)
256
257function charClass (str) {
258 return str.split("").reduce(function (s, c) {
259 s[c] = true
260 return s
261 }, {})
262}
263
264function is (charclass, c) {
265 return charclass[c]
266}
267
268function not (charclass, c) {
269 return !charclass[c]
270}
271
272var S = 0
273sax.STATE =
274{ BEGIN : S++
275, TEXT : S++ // general stuff
276, TEXT_ENTITY : S++ // &amp and such.
277, OPEN_WAKA : S++ // <
278, SGML_DECL : S++ // <!BLARG
279, SGML_DECL_QUOTED : S++ // <!BLARG foo "bar
280, DOCTYPE : S++ // <!DOCTYPE
281, DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah
282, DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ...
283, DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo
284, COMMENT_STARTING : S++ // <!-
285, COMMENT : S++ // <!--
286, COMMENT_ENDING : S++ // <!-- blah -
287, COMMENT_ENDED : S++ // <!-- blah --
288, CDATA : S++ // <![CDATA[ something
289, CDATA_ENDING : S++ // ]
290, CDATA_ENDING_2 : S++ // ]]
291, PROC_INST : S++ // <?hi
292, PROC_INST_BODY : S++ // <?hi there
293, PROC_INST_QUOTED : S++ // <?hi "there
294, PROC_INST_ENDING : S++ // <?hi "there" ?
295, OPEN_TAG : S++ // <strong
296, OPEN_TAG_SLASH : S++ // <strong /
297, ATTRIB : S++ // <a
298, ATTRIB_NAME : S++ // <a foo
299, ATTRIB_NAME_SAW_WHITE : S++ // <a foo _
300, ATTRIB_VALUE : S++ // <a foo=
301, ATTRIB_VALUE_QUOTED : S++ // <a foo="bar
302, ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar
303, ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar="&quot;"
304, ATTRIB_VALUE_ENTITY_U : S++ // <foo bar=&quot;
305, CLOSE_TAG : S++ // </a
306, CLOSE_TAG_SAW_WHITE : S++ // </a >
307, SCRIPT : S++ // <script> ...
308, SCRIPT_ENDING : S++ // <script> ... <
309}
310
311sax.ENTITIES =
312{ "apos" : "'"
313, "quot" : "\""
314, "amp" : "&"
315, "gt" : ">"
316, "lt" : "<"
317}
318
319for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
320
321// shorthand
322S = sax.STATE
323
324function emit (parser, event, data) {
325 parser[event] && parser[event](data)
326}
327
328function emitNode (parser, nodeType, data) {
329 if (parser.textNode) closeText(parser)
330 emit(parser, nodeType, data)
331}
332
333function closeText (parser) {
334 parser.textNode = textopts(parser.opt, parser.textNode)
335 if (parser.textNode) emit(parser, "ontext", parser.textNode)
336 parser.textNode = ""
337}
338
339function textopts (opt, text) {
340 if (opt.trim) text = text.trim()
341 if (opt.normalize) text = text.replace(/\s+/g, " ")
342 return text
343}
344
345function error (parser, er) {
346 closeText(parser)
347 if (parser.trackPosition) {
348 er += "\nLine: "+parser.line+
349 "\nColumn: "+parser.column+
350 "\nChar: "+parser.c
351 }
352 er = new Error(er)
353 parser.error = er
354 emit(parser, "onerror", er)
355 return parser
356}
357
358function end (parser) {
359 if (parser.state !== S.TEXT) error(parser, "Unexpected end")
360 closeText(parser)
361 parser.c = ""
362 parser.closed = true
363 emit(parser, "onend")
364 SAXParser.call(parser, parser.strict, parser.opt)
365 return parser
366}
367
368function strictFail (parser, message) {
369 if (parser.strict) error(parser, message)
370}
371
372function newTag (parser) {
373 if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
374 var parent = parser.tags[parser.tags.length - 1] || parser
375 , tag = parser.tag = { name : parser.tagName, attributes : {} }
376
377 // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
378 if (parser.opt.xmlns) tag.ns = parent.ns
379 parser.attribList.length = 0
380}
381
382function qname (name) {
383 var i = name.indexOf(":")
384 , qualName = i < 0 ? [ "", name ] : name.split(":")
385 , prefix = qualName[0]
386 , local = qualName[1]
387
388 // <x "xmlns"="http://foo">
389 if (name === "xmlns") {
390 prefix = "xmlns"
391 local = ""
392 }
393
394 return { prefix: prefix, local: local }
395}
396
397function attrib (parser) {
398 if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]()
399 if (parser.opt.xmlns) {
400 var qn = qname(parser.attribName)
401 , prefix = qn.prefix
402 , local = qn.local
403
404 if (prefix === "xmlns") {
405 // namespace binding attribute; push the binding into scope
406 if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
407 strictFail( parser
408 , "xml: prefix must be bound to " + XML_NAMESPACE + "\n"
409 + "Actual: " + parser.attribValue )
410 } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) {
411 strictFail( parser
412 , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n"
413 + "Actual: " + parser.attribValue )
414 } else {
415 var tag = parser.tag
416 , parent = parser.tags[parser.tags.length - 1] || parser
417 if (tag.ns === parent.ns) {
418 tag.ns = Object.create(parent.ns)
419 }
420 tag.ns[local] = parser.attribValue
421 }
422 }
423
424 // defer onattribute events until all attributes have been seen
425 // so any new bindings can take effect; preserve attribute order
426 // so deferred events can be emitted in document order
427 parser.attribList.push([parser.attribName, parser.attribValue])
428 } else {
429 // in non-xmlns mode, we can emit the event right away
430 parser.tag.attributes[parser.attribName] = parser.attribValue
431 emitNode( parser
432 , "onattribute"
433 , { name: parser.attribName
434 , value: parser.attribValue } )
435 }
436
437 parser.attribName = parser.attribValue = ""
438}
439
440function openTag (parser, selfClosing) {
441 if (parser.opt.xmlns) {
442 // emit namespace binding events
443 var tag = parser.tag
444
445 // add namespace info to tag
446 var qn = qname(parser.tagName)
447 tag.prefix = qn.prefix
448 tag.local = qn.local
449 tag.uri = tag.ns[qn.prefix] || qn.prefix
450
451 if (tag.prefix && !tag.uri) {
452 strictFail(parser, "Unbound namespace prefix: "
453 + JSON.stringify(parser.tagName))
454 }
455
456 var parent = parser.tags[parser.tags.length - 1] || parser
457 if (tag.ns && parent.ns !== tag.ns) {
458 Object.keys(tag.ns).forEach(function (p) {
459 emitNode( parser
460 , "onopennamespace"
461 , { prefix: p , uri: tag.ns[p] } )
462 })
463 }
464
465 // handle deferred onattribute events
466 for (var i = 0, l = parser.attribList.length; i < l; i ++) {
467 var nv = parser.attribList[i]
468 var name = nv[0]
469 , value = nv[1]
470 , qualName = qname(name)
471 , prefix = qualName.prefix
472 , local = qualName.local
473 , uri = tag.ns[prefix] || ""
474 , a = { name: name
475 , value: value
476 , prefix: prefix
477 , local: local
478 , uri: uri
479 }
480
481 // if there's any attributes with an undefined namespace,
482 // then fail on them now.
483 if (prefix && prefix != "xmlns" && !uri) {
484 strictFail(parser, "Unbound namespace prefix: "
485 + JSON.stringify(prefix))
486 a.uri = prefix
487 }
488 parser.tag.attributes[name] = a
489 emitNode(parser, "onattribute", a)
490 }
491 parser.attribList.length = 0
492 }
493
494 // process the tag
495 parser.sawRoot = true
496 parser.tags.push(parser.tag)
497 emitNode(parser, "onopentag", parser.tag)
498 if (!selfClosing) {
499 // special case for <script> in non-strict mode.
500 if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
501 parser.state = S.SCRIPT
502 } else {
503 parser.state = S.TEXT
504 }
505 parser.tag = null
506 parser.tagName = ""
507 }
508 parser.attribName = parser.attribValue = ""
509 parser.attribList.length = 0
510}
511
512function closeTag (parser) {
513 if (!parser.tagName) {
514 strictFail(parser, "Weird empty close tag.")
515 parser.textNode += "</>"
516 parser.state = S.TEXT
517 return
518 }
519 // first make sure that the closing tag actually exists.
520 // <a><b></c></b></a> will close everything, otherwise.
521 var t = parser.tags.length
522 var tagName = parser.tagName
523 if (!parser.strict) tagName = tagName[parser.looseCase]()
524 var closeTo = tagName
525 while (t --) {
526 var close = parser.tags[t]
527 if (close.name !== closeTo) {
528 // fail the first time in strict mode
529 strictFail(parser, "Unexpected close tag")
530 } else break
531 }
532
533 // didn't find it. we already failed for strict, so just abort.
534 if (t < 0) {
535 strictFail(parser, "Unmatched closing tag: "+parser.tagName)
536 parser.textNode += "</" + parser.tagName + ">"
537 parser.state = S.TEXT
538 return
539 }
540 parser.tagName = tagName
541 var s = parser.tags.length
542 while (s --> t) {
543 var tag = parser.tag = parser.tags.pop()
544 parser.tagName = parser.tag.name
545 emitNode(parser, "onclosetag", parser.tagName)
546
547 var x = {}
548 for (var i in tag.ns) x[i] = tag.ns[i]
549
550 var parent = parser.tags[parser.tags.length - 1] || parser
551 if (parser.opt.xmlns && tag.ns !== parent.ns) {
552 // remove namespace bindings introduced by tag
553 Object.keys(tag.ns).forEach(function (p) {
554 var n = tag.ns[p]
555 emitNode(parser, "onclosenamespace", { prefix: p, uri: n })
556 })
557 }
558 }
559 if (t === 0) parser.closedRoot = true
560 parser.tagName = parser.attribValue = parser.attribName = ""
561 parser.attribList.length = 0
562 parser.state = S.TEXT
563}
564
565function parseEntity (parser) {
566 var entity = parser.entity.toLowerCase()
567 , num
568 , numStr = ""
569 if (parser.ENTITIES[entity]) return parser.ENTITIES[entity]
570 if (entity.charAt(0) === "#") {
571 if (entity.charAt(1) === "x") {
572 entity = entity.slice(2)
573 num = parseInt(entity, 16)
574 numStr = num.toString(16)
575 } else {
576 entity = entity.slice(1)
577 num = parseInt(entity, 10)
578 numStr = num.toString(10)
579 }
580 }
581 entity = entity.replace(/^0+/, "")
582 if (numStr.toLowerCase() !== entity) {
583 strictFail(parser, "Invalid character entity")
584 return "&"+parser.entity + ";"
585 }
586 return String.fromCharCode(num)
587}
588
589function write (chunk) {
590 var parser = this
591 if (this.error) throw this.error
592 if (parser.closed) return error(parser,
593 "Cannot write after close. Assign an onready handler.")
594 if (chunk === null) return end(parser)
595 var i = 0, c = ""
596 while (parser.c = c = chunk.charAt(i++)) {
597 if (parser.trackPosition) {
598 parser.position ++
599 if (c === "\n") {
600 parser.line ++
601 parser.column = 0
602 } else parser.column ++
603 }
604 switch (parser.state) {
605
606 case S.BEGIN:
607 if (c === "<") parser.state = S.OPEN_WAKA
608 else if (not(whitespace,c)) {
609 // have to process this as a text node.
610 // weird, but happens.
611 strictFail(parser, "Non-whitespace before first tag.")
612 parser.textNode = c
613 parser.state = S.TEXT
614 }
615 continue
616
617 case S.TEXT:
618 if (parser.sawRoot && !parser.closedRoot) {
619 var starti = i-1
620 while (c && c!=="<" && c!=="&") {
621 c = chunk.charAt(i++)
622 if (c && parser.trackPosition) {
623 parser.position ++
624 if (c === "\n") {
625 parser.line ++
626 parser.column = 0
627 } else parser.column ++
628 }
629 }
630 parser.textNode += chunk.substring(starti, i-1)
631 }
632 if (c === "<") parser.state = S.OPEN_WAKA
633 else {
634 if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
635 strictFail("Text data outside of root node.")
636 if (c === "&") parser.state = S.TEXT_ENTITY
637 else parser.textNode += c
638 }
639 continue
640
641 case S.SCRIPT:
642 // only non-strict
643 if (c === "<") {
644 parser.state = S.SCRIPT_ENDING
645 } else parser.script += c
646 continue
647
648 case S.SCRIPT_ENDING:
649 if (c === "/") {
650 emitNode(parser, "onscript", parser.script)
651 parser.state = S.CLOSE_TAG
652 parser.script = ""
653 parser.tagName = ""
654 } else {
655 parser.script += "<" + c
656 parser.state = S.SCRIPT
657 }
658 continue
659
660 case S.OPEN_WAKA:
661 // either a /, ?, !, or text is coming next.
662 if (c === "!") {
663 parser.state = S.SGML_DECL
664 parser.sgmlDecl = ""
665 } else if (is(whitespace, c)) {
666 // wait for it...
667 } else if (is(nameStart,c)) {
668 parser.startTagPosition = parser.position - 1
669 parser.state = S.OPEN_TAG
670 parser.tagName = c
671 } else if (c === "/") {
672 parser.startTagPosition = parser.position - 1
673 parser.state = S.CLOSE_TAG
674 parser.tagName = ""
675 } else if (c === "?") {
676 parser.state = S.PROC_INST
677 parser.procInstName = parser.procInstBody = ""
678 } else {
679 strictFail(parser, "Unencoded <")
680 parser.textNode += "<" + c
681 parser.state = S.TEXT
682 }
683 continue
684
685 case S.SGML_DECL:
686 if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
687 emitNode(parser, "onopencdata")
688 parser.state = S.CDATA
689 parser.sgmlDecl = ""
690 parser.cdata = ""
691 } else if (parser.sgmlDecl+c === "--") {
692 parser.state = S.COMMENT
693 parser.comment = ""
694 parser.sgmlDecl = ""
695 } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
696 parser.state = S.DOCTYPE
697 if (parser.doctype || parser.sawRoot) strictFail(parser,
698 "Inappropriately located doctype declaration")
699 parser.doctype = ""
700 parser.sgmlDecl = ""
701 } else if (c === ">") {
702 emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
703 parser.sgmlDecl = ""
704 parser.state = S.TEXT
705 } else if (is(quote, c)) {
706 parser.state = S.SGML_DECL_QUOTED
707 parser.sgmlDecl += c
708 } else parser.sgmlDecl += c
709 continue
710
711 case S.SGML_DECL_QUOTED:
712 if (c === parser.q) {
713 parser.state = S.SGML_DECL
714 parser.q = ""
715 }
716 parser.sgmlDecl += c
717 continue
718
719 case S.DOCTYPE:
720 if (c === ">") {
721 parser.state = S.TEXT
722 emitNode(parser, "ondoctype", parser.doctype)
723 parser.doctype = true // just remember that we saw it.
724 } else {
725 parser.doctype += c
726 if (c === "[") parser.state = S.DOCTYPE_DTD
727 else if (is(quote, c)) {
728 parser.state = S.DOCTYPE_QUOTED
729 parser.q = c
730 }
731 }
732 continue
733
734 case S.DOCTYPE_QUOTED:
735 parser.doctype += c
736 if (c === parser.q) {
737 parser.q = ""
738 parser.state = S.DOCTYPE
739 }
740 continue
741
742 case S.DOCTYPE_DTD:
743 parser.doctype += c
744 if (c === "]") parser.state = S.DOCTYPE
745 else if (is(quote,c)) {
746 parser.state = S.DOCTYPE_DTD_QUOTED
747 parser.q = c
748 }
749 continue
750
751 case S.DOCTYPE_DTD_QUOTED:
752 parser.doctype += c
753 if (c === parser.q) {
754 parser.state = S.DOCTYPE_DTD
755 parser.q = ""
756 }
757 continue
758
759 case S.COMMENT:
760 if (c === "-") parser.state = S.COMMENT_ENDING
761 else parser.comment += c
762 continue
763
764 case S.COMMENT_ENDING:
765 if (c === "-") {
766 parser.state = S.COMMENT_ENDED
767 parser.comment = textopts(parser.opt, parser.comment)
768 if (parser.comment) emitNode(parser, "oncomment", parser.comment)
769 parser.comment = ""
770 } else {
771 parser.comment += "-" + c
772 parser.state = S.COMMENT
773 }
774 continue
775
776 case S.COMMENT_ENDED:
777 if (c !== ">") {
778 strictFail(parser, "Malformed comment")
779 // allow <!-- blah -- bloo --> in non-strict mode,
780 // which is a comment of " blah -- bloo "
781 parser.comment += "--" + c
782 parser.state = S.COMMENT
783 } else parser.state = S.TEXT
784 continue
785
786 case S.CDATA:
787 if (c === "]") parser.state = S.CDATA_ENDING
788 else parser.cdata += c
789 continue
790
791 case S.CDATA_ENDING:
792 if (c === "]") parser.state = S.CDATA_ENDING_2
793 else {
794 parser.cdata += "]" + c
795 parser.state = S.CDATA
796 }
797 continue
798
799 case S.CDATA_ENDING_2:
800 if (c === ">") {
801 if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
802 emitNode(parser, "onclosecdata")
803 parser.cdata = ""
804 parser.state = S.TEXT
805 } else if (c === "]") {
806 parser.cdata += "]"
807 } else {
808 parser.cdata += "]]" + c
809 parser.state = S.CDATA
810 }
811 continue
812
813 case S.PROC_INST:
814 if (c === "?") parser.state = S.PROC_INST_ENDING
815 else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
816 else parser.procInstName += c
817 continue
818
819 case S.PROC_INST_BODY:
820 if (!parser.procInstBody && is(whitespace, c)) continue
821 else if (c === "?") parser.state = S.PROC_INST_ENDING
822 else if (is(quote, c)) {
823 parser.state = S.PROC_INST_QUOTED
824 parser.q = c
825 parser.procInstBody += c
826 } else parser.procInstBody += c
827 continue
828
829 case S.PROC_INST_ENDING:
830 if (c === ">") {
831 emitNode(parser, "onprocessinginstruction", {
832 name : parser.procInstName,
833 body : parser.procInstBody
834 })
835 parser.procInstName = parser.procInstBody = ""
836 parser.state = S.TEXT
837 } else {
838 parser.procInstBody += "?" + c
839 parser.state = S.PROC_INST_BODY
840 }
841 continue
842
843 case S.PROC_INST_QUOTED:
844 parser.procInstBody += c
845 if (c === parser.q) {
846 parser.state = S.PROC_INST_BODY
847 parser.q = ""
848 }
849 continue
850
851 case S.OPEN_TAG:
852 if (is(nameBody, c)) parser.tagName += c
853 else {
854 newTag(parser)
855 if (c === ">") openTag(parser)
856 else if (c === "/") parser.state = S.OPEN_TAG_SLASH
857 else {
858 if (not(whitespace, c)) strictFail(
859 parser, "Invalid character in tag name")
860 parser.state = S.ATTRIB
861 }
862 }
863 continue
864
865 case S.OPEN_TAG_SLASH:
866 if (c === ">") {
867 openTag(parser, true)
868 closeTag(parser)
869 } else {
870 strictFail(parser, "Forward-slash in opening tag not followed by >")
871 parser.state = S.ATTRIB
872 }
873 continue
874
875 case S.ATTRIB:
876 // haven't read the attribute name yet.
877 if (is(whitespace, c)) continue
878 else if (c === ">") openTag(parser)
879 else if (c === "/") parser.state = S.OPEN_TAG_SLASH
880 else if (is(nameStart, c)) {
881 parser.attribName = c
882 parser.attribValue = ""
883 parser.state = S.ATTRIB_NAME
884 } else strictFail(parser, "Invalid attribute name")
885 continue
886
887 case S.ATTRIB_NAME:
888 if (c === "=") parser.state = S.ATTRIB_VALUE
889 else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
890 else if (is(nameBody, c)) parser.attribName += c
891 else strictFail(parser, "Invalid attribute name")
892 continue
893
894 case S.ATTRIB_NAME_SAW_WHITE:
895 if (c === "=") parser.state = S.ATTRIB_VALUE
896 else if (is(whitespace, c)) continue
897 else {
898 strictFail(parser, "Attribute without value")
899 parser.tag.attributes[parser.attribName] = ""
900 parser.attribValue = ""
901 emitNode(parser, "onattribute",
902 { name : parser.attribName, value : "" })
903 parser.attribName = ""
904 if (c === ">") openTag(parser)
905 else if (is(nameStart, c)) {
906 parser.attribName = c
907 parser.state = S.ATTRIB_NAME
908 } else {
909 strictFail(parser, "Invalid attribute name")
910 parser.state = S.ATTRIB
911 }
912 }
913 continue
914
915 case S.ATTRIB_VALUE:
916 if (is(whitespace, c)) continue
917 else if (is(quote, c)) {
918 parser.q = c
919 parser.state = S.ATTRIB_VALUE_QUOTED
920 } else {
921 strictFail(parser, "Unquoted attribute value")
922 parser.state = S.ATTRIB_VALUE_UNQUOTED
923 parser.attribValue = c
924 }
925 continue
926
927 case S.ATTRIB_VALUE_QUOTED:
928 if (c !== parser.q) {
929 if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
930 else parser.attribValue += c
931 continue
932 }
933 attrib(parser)
934 parser.q = ""
935 parser.state = S.ATTRIB
936 continue
937
938 case S.ATTRIB_VALUE_UNQUOTED:
939 if (not(attribEnd,c)) {
940 if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
941 else parser.attribValue += c
942 continue
943 }
944 attrib(parser)
945 if (c === ">") openTag(parser)
946 else parser.state = S.ATTRIB
947 continue
948
949 case S.CLOSE_TAG:
950 if (!parser.tagName) {
951 if (is(whitespace, c)) continue
952 else if (not(nameStart, c)) strictFail(parser,
953 "Invalid tagname in closing tag.")
954 else parser.tagName = c
955 }
956 else if (c === ">") closeTag(parser)
957 else if (is(nameBody, c)) parser.tagName += c
958 else {
959 if (not(whitespace, c)) strictFail(parser,
960 "Invalid tagname in closing tag")
961 parser.state = S.CLOSE_TAG_SAW_WHITE
962 }
963 continue
964
965 case S.CLOSE_TAG_SAW_WHITE:
966 if (is(whitespace, c)) continue
967 if (c === ">") closeTag(parser)
968 else strictFail("Invalid characters in closing tag")
969 continue
970
971 case S.TEXT_ENTITY:
972 case S.ATTRIB_VALUE_ENTITY_Q:
973 case S.ATTRIB_VALUE_ENTITY_U:
974 switch(parser.state) {
975 case S.TEXT_ENTITY:
976 var returnState = S.TEXT, buffer = "textNode"
977 break
978
979 case S.ATTRIB_VALUE_ENTITY_Q:
980 var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
981 break
982
983 case S.ATTRIB_VALUE_ENTITY_U:
984 var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
985 break
986 }
987 if (c === ";") {
988 parser[buffer] += parseEntity(parser)
989 parser.entity = ""
990 parser.state = returnState
991 }
992 else if (is(entity, c)) parser.entity += c
993 else {
994 strictFail("Invalid character entity")
995 parser[buffer] += "&" + parser.entity + c
996 parser.entity = ""
997 parser.state = returnState
998 }
999 continue
1000
1001 default:
1002 throw new Error(parser, "Unknown state: " + parser.state)
1003 }
1004 } // while
1005 // cdata blocks can get very big under normal conditions. emit and move on.
1006 // if (parser.state === S.CDATA && parser.cdata) {
1007 // emitNode(parser, "oncdata", parser.cdata)
1008 // parser.cdata = ""
1009 // }
1010 if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
1011 return parser
1012}
1013
1014})(typeof exports === "undefined" ? sax = {} : exports)