1 | var util = require('util');
|
2 | var EventEmitter = require('events').EventEmitter;
|
3 |
|
4 | var sax = require('sax');
|
5 | var ent = require('ent');
|
6 |
|
7 | function last (arr, i) {
|
8 | return arr[arr.length - 1 - (i || 0)];
|
9 | }
|
10 |
|
11 | var CLOSING = [
|
12 | "area", "base", "basefont", "br", "col", "frame", "hr",
|
13 | "img", "input", "link", "meta", "param"
|
14 | ];
|
15 |
|
16 | function parseQuery (text) {
|
17 | var res = [];
|
18 | text.trim().split(/\s+/).forEach(function (token) {
|
19 | switch (token) {
|
20 | case '>':
|
21 | res.push(['child']);
|
22 | break;
|
23 | case '~':
|
24 | res.push(['sibling']);
|
25 | break;
|
26 | case '':
|
27 | break;
|
28 | default:
|
29 |
|
30 | res.push(['simple', token.split(/(?=[.:#]+)/)]);
|
31 | }
|
32 | })
|
33 | return res;
|
34 | }
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 | util.inherits(CssQuery, EventEmitter);
|
41 |
|
42 | function CssQuery (text, ss) {
|
43 | var query = this;
|
44 |
|
45 | var steps = parseQuery(text);
|
46 | var state = [];
|
47 |
|
48 | function isSimpleMatch (tag, attributes, i) {
|
49 | if (steps[i] && steps[i][0] == 'simple') {
|
50 | return steps[i][1].every(function (part) {
|
51 | switch (part[0]) {
|
52 | case '#':
|
53 | return attributes.id && attributes.id.trim() == part.substr(1);
|
54 | case '.':
|
55 | return attributes.class && attributes.class.trim().split(/\s+/).indexOf(part.substr(1)) != -1;
|
56 | default:
|
57 | return part == tag;
|
58 | }
|
59 | })
|
60 | }
|
61 | return false;
|
62 | }
|
63 |
|
64 | function isChildMatch (tag, attributes, i, d, vd) {
|
65 | return steps[i] && steps[i][0] == 'child' && d == vd - 1 && isSimpleMatch(tag, attributes, i + 1);
|
66 | }
|
67 |
|
68 | function isSiblingMatch (tag, attributes, i, d, vd) {
|
69 | return steps[i - 1] && steps[i - 1][0] == 'sibling' && d == vd && isSimpleMatch(tag, attributes, i);
|
70 | }
|
71 |
|
72 | var d = 0;
|
73 |
|
74 | function pushDepth (tag, attributes) {
|
75 | state.forEach(function (q) {
|
76 | if (isSimpleMatch(tag, attributes, q.length)) {
|
77 |
|
78 | q.push(d);
|
79 | } else if (isChildMatch(tag, attributes, q.length, last(q), d)) {
|
80 |
|
81 | q.push(d);
|
82 | q.push(d);
|
83 | } else if (isSiblingMatch(tag, attributes, q.length, last(q), d)) {
|
84 |
|
85 | q.push(d);
|
86 | }
|
87 | });
|
88 | if (isSimpleMatch(tag, attributes, 0)) {
|
89 |
|
90 | state.push([d]);
|
91 | }
|
92 | state.forEach(function (q) {
|
93 | var i = q.length;
|
94 | if (steps[i] && steps[i][0] == 'sibling') {
|
95 | q.push(d - 1);
|
96 | }
|
97 | });
|
98 | if (state.some(function (q) {
|
99 | return q.length == steps.length && last(q) == d;
|
100 | })) {
|
101 | query.emit('match', tag, attributes);
|
102 | }
|
103 | d++;
|
104 |
|
105 |
|
106 | query.emit('opentag', tag, attributes);
|
107 | }
|
108 |
|
109 | function popDepth (tag) {
|
110 | d--;
|
111 | state.forEach(function (q) {
|
112 | while (q.length && q[q.length - 1] >= d) {
|
113 | q.pop();
|
114 | }
|
115 | })
|
116 | state = state.filter(function (q) {
|
117 | return q.length > 0;
|
118 | });
|
119 |
|
120 |
|
121 | query.emit('closetag', tag);
|
122 | }
|
123 |
|
124 |
|
125 |
|
126 | ss.on('opentag', function (node) {
|
127 | var tag = node.name.toLowerCase();
|
128 | pushDepth(tag, node.attributes);
|
129 |
|
130 | if (CLOSING.indexOf(tag) != -1) {
|
131 | popDepth(tag);
|
132 | }
|
133 | });
|
134 |
|
135 | ss.on('closetag', function (tag) {
|
136 | tag = tag.toLowerCase();
|
137 | if (CLOSING.indexOf(tag) != -1) {
|
138 | return;
|
139 | }
|
140 |
|
141 | popDepth(tag);
|
142 | });
|
143 |
|
144 | ss.on('end', function () {
|
145 |
|
146 | query.emit('end');
|
147 | });
|
148 |
|
149 | ss.on('text', function (text) {
|
150 |
|
151 | query.emit('text', text);
|
152 | });
|
153 | }
|
154 |
|
155 | CssQuery.prototype.skip = function (next) {
|
156 | var d = 0;
|
157 | function into () {
|
158 | d++;
|
159 | }
|
160 | function outof () {
|
161 | d--;
|
162 | if (d == 0) {
|
163 | this.removeListener('opentag', into);
|
164 | this.removeListener('closetag', outof);
|
165 | next.call(this);
|
166 | }
|
167 | }
|
168 | this.addListener('opentag', into);
|
169 | this.addListener('closetag', outof);
|
170 | };
|
171 |
|
172 | CssQuery.prototype.readText = function (next) {
|
173 | var str = [];
|
174 | function data (text) {
|
175 | str.push(text)
|
176 | }
|
177 | this.addListener('text', data);
|
178 | this.skip(function () {
|
179 | this.removeListener('text', data);
|
180 | next.call(this, str.join(''));
|
181 | });
|
182 | };
|
183 |
|
184 | CssQuery.prototype.readHTML = function (next) {
|
185 | var str = [];
|
186 | function data (text) {
|
187 | str.push(text)
|
188 | }
|
189 | function opentag (tag, attributes) {
|
190 | str.push('<' + tag + Object.keys(attributes).map(function (key) {
|
191 | return ' ' + key + '=' + '"' + ent.encode(attributes[key]) + '"';
|
192 | }).join('') + '>');
|
193 | }
|
194 | function closetag (tag) {
|
195 | if (CLOSING.indexOf(tag) == -1) {
|
196 | str.push('</' + tag + '>');
|
197 | }
|
198 | }
|
199 | this.addListener('text', data);
|
200 | this.addListener('opentag', opentag);
|
201 | this.addListener('closetag', closetag);
|
202 | this.skip(function () {
|
203 | this.removeListener('text', data);
|
204 | this.removeListener('opentag', opentag);
|
205 | this.removeListener('closetag', closetag);
|
206 | next.call(this, str.join(''));
|
207 | });
|
208 | };
|
209 |
|
210 |
|
211 |
|
212 |
|
213 |
|
214 | exports.createStream = function () {
|
215 | var stream = sax.createStream(false, {
|
216 | lowercase: true
|
217 | });
|
218 | stream.query = function (text) {
|
219 | return new CssQuery(text, this);
|
220 | };
|
221 | return stream;
|
222 | }; |
\ | No newline at end of file |