1 | var ElementType = require("./ElementType.js");
|
2 |
|
3 | function Parser(cbs, options){
|
4 | if(options) this._options = options;
|
5 | if(cbs) this._cbs = cbs;
|
6 |
|
7 | this._buffer = "";
|
8 | this._prevTagSep = "";
|
9 | this._stack = [];
|
10 | this._contentFlags = 0;
|
11 | this._done = false;
|
12 | this._parseState = ElementType.Text;
|
13 | }
|
14 |
|
15 |
|
16 |
|
17 | var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/;
|
18 | var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
|
19 |
|
20 | Parser.prototype._options = {
|
21 | xmlMode: false,
|
22 | lowerCaseTags: false
|
23 | };
|
24 |
|
25 | Parser.prototype._cbs = {
|
26 | |
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 | };
|
34 |
|
35 |
|
36 |
|
37 |
|
38 | Parser.prototype.parseComplete = function(data){
|
39 | this.reset();
|
40 | this.parseChunk(data);
|
41 | this.done();
|
42 | };
|
43 |
|
44 |
|
45 | Parser.prototype.write =
|
46 | Parser.prototype.parseChunk = function(data){
|
47 | if(this._done) this._handleError(Error("Attempted to parse chunk after parsing already done"));
|
48 | this._buffer += data;
|
49 | this._parseTags();
|
50 | };
|
51 |
|
52 |
|
53 | Parser.prototype.done = function(){
|
54 | if(this._done) return;
|
55 | this._done = true;
|
56 |
|
57 |
|
58 | if(this._buffer) this._parseTags(true);
|
59 |
|
60 | if(this._cbs.onclosetag){
|
61 | while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
|
62 | }
|
63 |
|
64 | if(this._cbs.onend) this._cbs.onend();
|
65 | };
|
66 |
|
67 |
|
68 | Parser.prototype.reset = function(){
|
69 | Parser.call(this);
|
70 | if(this._cbs.onreset) this._cbs.onreset();
|
71 | };
|
72 |
|
73 |
|
74 |
|
75 | var parseAttributes = function(data){
|
76 | var pos = data.search(/\s/), attrs = {};
|
77 | if(pos === -1) return attrs;
|
78 | var attribRaw = data.substr(pos);
|
79 |
|
80 | _reAttrib.lastIndex = 0;
|
81 | var match;
|
82 |
|
83 | while(match = _reAttrib.exec(attribRaw)){
|
84 | if(match[1]) attrs[match[1]] = match[2];
|
85 | else if(match[3]) attrs[match[3]] = match[4];
|
86 | else if(match[5]) attrs[match[5]] = match[6];
|
87 | else if(match[7]) attrs[match[7]] = match[7];
|
88 | }
|
89 |
|
90 | return attrs;
|
91 | };
|
92 |
|
93 |
|
94 | Parser.prototype._parseTagName = function(data){
|
95 | var match = data.match(_reTagName);
|
96 | if(match === null) return "";
|
97 | if(this._options.lowerCaseTags){
|
98 | return match[1] + match[2].toLowerCase();
|
99 | }
|
100 | else return match[1] + match[2];
|
101 | };
|
102 |
|
103 |
|
104 | var SpecialTags = {};
|
105 | SpecialTags[ElementType.Tag] = 0;
|
106 | SpecialTags[ElementType.Style] = 1;
|
107 | SpecialTags[ElementType.Script] = 2;
|
108 | SpecialTags.w = 4;
|
109 | SpecialTags[ElementType.Comment] = 8;
|
110 |
|
111 |
|
112 | Parser.prototype._parseTags = function(force){
|
113 | var buffer = this._buffer, current = 0;
|
114 |
|
115 | var next, tagSep, rawData, elementName, elementType, elementData;
|
116 |
|
117 | var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
|
118 |
|
119 |
|
120 | if(force) opening = Infinity;
|
121 |
|
122 | while(opening !== closing){
|
123 | if((opening !== -1 && opening < closing) || closing === -1){
|
124 | next = opening;
|
125 | tagSep = "<";
|
126 | opening = buffer.indexOf(tagSep, next + 1);
|
127 | }
|
128 | else{
|
129 | next = closing;
|
130 | tagSep = ">";
|
131 | closing = buffer.indexOf(tagSep, next + 1);
|
132 | }
|
133 | rawData = buffer.substring(current, next);
|
134 | elementType = this._parseState;
|
135 |
|
136 |
|
137 | current = next + 1;
|
138 | this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
|
139 |
|
140 | if(elementType === ElementType.Tag){
|
141 | elementData = rawData.trim();
|
142 | elementName = this._parseTagName(elementData);
|
143 | }
|
144 | else{
|
145 | elementData = rawData;
|
146 | elementName = "";
|
147 | }
|
148 |
|
149 |
|
150 |
|
151 | if(this._contentFlags === 0){ }
|
152 | else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
|
153 |
|
154 | this._processComment(rawData, tagSep);
|
155 | continue;
|
156 | }
|
157 |
|
158 | else if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "/script"){
|
159 |
|
160 | this._contentFlags %= SpecialTags[ElementType.Script];
|
161 | }
|
162 | else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementName === "/style"){
|
163 |
|
164 | this._contentFlags %= SpecialTags[ElementType.Style];
|
165 | }
|
166 |
|
167 |
|
168 | else if(!this._options.xmlMode && rawData.substring(0, 3) !== "!--"){
|
169 |
|
170 | if(this._contentFlags >= SpecialTags.w){
|
171 | if(this._cbs.ontext) this._cbs.ontext(this._prevTagSep + rawData);
|
172 | }
|
173 | else{
|
174 | this._contentFlags += SpecialTags.w;
|
175 | if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
|
176 | }
|
177 | this._prevTagSep = tagSep;
|
178 | continue;
|
179 | }
|
180 |
|
181 |
|
182 | if(elementType === ElementType.Tag){
|
183 | if(rawData.substring(0, 3) === "!--"){
|
184 | this._contentFlags += SpecialTags[ElementType.Comment];
|
185 | this._processComment(rawData.substr(3), tagSep);
|
186 | continue;
|
187 | }
|
188 |
|
189 | if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
|
190 |
|
191 |
|
192 | if(this._cbs.onprocessinginstruction){
|
193 | this._cbs.onprocessinginstruction(elementName, elementData);
|
194 | }
|
195 | continue;
|
196 | }
|
197 | if(elementName.charAt(0) === "/") this._processCloseTag(elementName.substr(1));
|
198 | else this._processOpenTag(elementName, elementData, tagSep);
|
199 | }
|
200 | else if(elementType === ElementType.Text && rawData !== "" && this._cbs.ontext){
|
201 | this._cbs.ontext(elementData);
|
202 | }
|
203 | }
|
204 |
|
205 | this._buffer = buffer.substring(current);
|
206 | };
|
207 |
|
208 | Parser.prototype._processComment = function(rawData, tagSep){
|
209 | this._prevTagSep = tagSep;
|
210 |
|
211 | if(tagSep === ">" && rawData.substr(-2) === "--"){
|
212 |
|
213 | this._contentFlags %= SpecialTags.w;
|
214 | rawData = rawData.slice(0, -2);
|
215 | }
|
216 | else rawData += tagSep;
|
217 |
|
218 | if(this._cbs.oncomment) this._cbs.oncomment(rawData);
|
219 | };
|
220 |
|
221 | var emptyTags = require("./ClosingTags.js").self;
|
222 |
|
223 | Parser.prototype._isEmptyTag = function(name){
|
224 | return !this._options.xmlMode && emptyTags[name];
|
225 | };
|
226 |
|
227 | Parser.prototype._processCloseTag = function(name){
|
228 | if(this._stack && !this._isEmptyTag(name)){
|
229 | var i = this._stack.length;
|
230 | while(i !== 0 && this._stack[--i] !== name){}
|
231 | if(i !== 0 || this._stack[0] === name)
|
232 | if(this._cbs.onclosetag){
|
233 | while(i < this._stack.length)
|
234 | this._cbs.onclosetag(this._stack.pop());
|
235 | }
|
236 | else this._stack.splice(i);
|
237 | }
|
238 |
|
239 | else if(name === "br" && !this._options.xmlMode)
|
240 | this._processOpenTag(name, "/");
|
241 | };
|
242 |
|
243 | Parser.prototype._processOpenTag = function(name, data, tagSep){
|
244 | var type = ElementType.Tag;
|
245 | if(this._options.xmlMode){ }
|
246 | else if(name === "script") type = ElementType.Script;
|
247 | else if(name === "style") type = ElementType.Style;
|
248 |
|
249 | if(this._cbs.onopentag){
|
250 | this._cbs.onopentag(name, parseAttributes(data), type);
|
251 | }
|
252 |
|
253 |
|
254 | if(data.substr(-1) === "/" || this._isEmptyTag(name)){
|
255 | if(this._cbs.onclosetag) this._cbs.onclosetag(name);
|
256 | } else {
|
257 | this._contentFlags += SpecialTags[type];
|
258 | this._stack.push(name);
|
259 | this._prevTagSep = tagSep;
|
260 | }
|
261 | };
|
262 |
|
263 | Parser.prototype._handleError = function(error){
|
264 | if(this._cbs.onerror)
|
265 | this._cbs.onerror(error);
|
266 | else throw error;
|
267 | };
|
268 |
|
269 | module.exports = Parser; |
\ | No newline at end of file |