1 | 'use strict';
|
2 | var sax = require('sax'),
|
3 | fs = require('fs'),
|
4 | util = require('util'),
|
5 | events = require('events'),
|
6 | clone = require('clone')
|
7 |
|
8 | ;
|
9 |
|
10 |
|
11 | function dispose(o) {
|
12 | for (var p in o) {
|
13 | if (o.hasOwnProperty(p)) {
|
14 | if (isNaN(parseInt(p))) {
|
15 | dispose(o[p]);
|
16 | }
|
17 | delete o[p]
|
18 | }
|
19 | }
|
20 | }
|
21 | function wash(o) {
|
22 | for (var p in o) {
|
23 | if (o.hasOwnProperty(p)) {
|
24 | if (isNaN(parseInt(p))) {
|
25 | wash(o[p], d - 1);
|
26 | }
|
27 | if (Object.keys(o[p]).length) {
|
28 | delete o[p]
|
29 | }
|
30 | }
|
31 | }
|
32 | }
|
33 |
|
34 |
|
35 |
|
36 |
|
37 |
|
38 | function within(arr1, arr2) {
|
39 | for (var i = 0; i < arr1.length; i++) {
|
40 | if (arr1.indexOf(i) !== arr2.indexOf(i)) {
|
41 | return false
|
42 | }
|
43 | }
|
44 | return true
|
45 | }
|
46 |
|
47 | function string2path(s)
|
48 | {
|
49 | if (typeof s === 'string') {
|
50 | var k = {}, pattern
|
51 | k.xpath = s.toLowerCase()
|
52 | pattern = '^'
|
53 | k.xpath.split('/').forEach(function (item, indice) {
|
54 | if (indice === 0) {
|
55 | return
|
56 | }
|
57 | if (item === '') {
|
58 | pattern += '.*'
|
59 | }
|
60 | else if (item === '*') {
|
61 | pattern += '/' + '[^/]+'
|
62 | }
|
63 | else {
|
64 | pattern += '/' + item
|
65 | }
|
66 | })
|
67 | pattern += '$'
|
68 | k.regex = new RegExp(pattern)
|
69 | return k
|
70 | }
|
71 | }
|
72 |
|
73 | function XMLSplitter(knife, opt) {
|
74 |
|
75 | if (!(this instanceof XMLSplitter)) {
|
76 | return new XMLSplitter()
|
77 | }
|
78 |
|
79 | events.EventEmitter.call(this)
|
80 |
|
81 |
|
82 | var self = this
|
83 | var tagname = 'row'
|
84 |
|
85 |
|
86 | self.stack = []
|
87 | self.path = []
|
88 | self.cdata = ''
|
89 | self.knifes = []
|
90 | self.counter = 0
|
91 | self.track = []
|
92 | self.stream = require("sax").createStream(true, {trim : true})
|
93 | self.opt = opt || {}
|
94 | self.regular = (self.opt.regular === true ? true : false)
|
95 |
|
96 |
|
97 |
|
98 |
|
99 |
|
100 |
|
101 |
|
102 |
|
103 |
|
104 |
|
105 |
|
106 |
|
107 |
|
108 |
|
109 | self.stream.destroy = function() {
|
110 | self.emit('close', new Error('The stream seem to be destroyed'))
|
111 | }
|
112 |
|
113 |
|
114 |
|
115 |
|
116 | if (Array.isArray(knife)) {
|
117 | knife.forEach(function (item) {
|
118 | self.knifes.push(string2path(item))
|
119 | }
|
120 | )
|
121 | }
|
122 | else {
|
123 | self.knifes.push(string2path(knife))
|
124 | }
|
125 | if (self.knifes.length === 0) {
|
126 | throw new Error('Invalid Parameter')
|
127 | }
|
128 |
|
129 |
|
130 |
|
131 | self.stream.on('close', function (e) {
|
132 | self.emit('close', e)
|
133 | })
|
134 |
|
135 |
|
136 | self.stream.on('error', function (e) {
|
137 | this._parser.error = null
|
138 | this._parser.resume()
|
139 | self.emit('error', e)
|
140 | }
|
141 | )
|
142 | self.stream.on('processinginstruction', function (pi) {
|
143 | })
|
144 | self.stream.on('text', function (v) {
|
145 | self.cvalue('$t', v)
|
146 | })
|
147 | self.stream.on('comment', function (v) {
|
148 | self.cvalue('$c', v)
|
149 | })
|
150 | self.stream.on('cdata', function (v) {
|
151 | self.cdata += v
|
152 | })
|
153 | self.stream.on('opencdata', function () {
|
154 | self.cdata = ''
|
155 | })
|
156 | self.stream.on('closecdata', function () {
|
157 | self.cvalue('$cd', self.cdata)
|
158 | self.cdata = ''
|
159 | })
|
160 | self.stream.on('opentag', function (node) {
|
161 | self.path.push(node.name.toLowerCase())
|
162 |
|
163 | var p = '/' + self.path.join('/')
|
164 |
|
165 | self.track.push(self.knifes.reduce(function (prev, cur) {
|
166 | return (prev === true || cur.regex.test(p))
|
167 | }, false))
|
168 |
|
169 |
|
170 | if (!self.track.some(function (x) { return x })) {
|
171 | self.stack.push({})
|
172 | return
|
173 | }
|
174 |
|
175 | if (self.stack.length === 1 && node.name === tagname) {
|
176 | self.stack.push({})
|
177 | }
|
178 | else {
|
179 | self.stack.push(self.cvalue(node.name, self.cattr(node.attributes)))
|
180 | }
|
181 |
|
182 | })
|
183 | self.stream.on('closetag', function (tag) {
|
184 | var p = '/' + self.path.join('/')
|
185 | var l = self.stack.pop()
|
186 | var n = tag.replace(':', '$')
|
187 |
|
188 |
|
189 | self.track.pop()
|
190 | var t2 = self.knifes.reduce(function (prev, cur) {
|
191 | return (prev === true || cur.regex.test(p))
|
192 | }, false)
|
193 |
|
194 | if (t2) {
|
195 | self.emit('data', clone(l, false), n, p)
|
196 | if (self.regular) {
|
197 |
|
198 |
|
199 | dispose(l)
|
200 | l = null
|
201 | wash(self.stack)
|
202 |
|
203 |
|
204 | }
|
205 | self.counter++
|
206 | }
|
207 | self.path.pop()
|
208 | if (self.path.length === 0) {
|
209 | self.emit('end', self.counter)
|
210 | }
|
211 | })
|
212 | self.stream.on('ready', function () {
|
213 | self.stack.pop()
|
214 | })
|
215 | self.stream.on('end', function () {
|
216 | if (self.stack.length !== 0) {
|
217 | self.emit('end', self.counter)
|
218 | }
|
219 | }
|
220 | )
|
221 | }
|
222 | util.inherits(XMLSplitter, events.EventEmitter)
|
223 |
|
224 | XMLSplitter.prototype.parseString = function (string, encoding) {
|
225 | var self = this
|
226 | self.stream.end(string, encoding || 'utf8')
|
227 | }
|
228 |
|
229 | XMLSplitter.prototype.parseStream = function (stream) {
|
230 | var self = this
|
231 | stream.pipe(self.stream)
|
232 | }
|
233 |
|
234 | XMLSplitter.prototype.cvalue = function (n, v) {
|
235 | var self = this
|
236 | n = n.replace(':', '$')
|
237 | var o = self.stack[self.stack.length - 1]
|
238 | if (o === undefined) {
|
239 | o = {}
|
240 | o[n] = v
|
241 | return o[n]
|
242 | }
|
243 | else if (o[n] === undefined) {
|
244 | o[n] = v
|
245 | return o[n]
|
246 | }
|
247 | else if (!Array.isArray(o[n])) {
|
248 | var x = o[n]
|
249 | o[n] = new Array(x, v)
|
250 | return o[n][1]
|
251 | }
|
252 | else {
|
253 | var i = o[n].push(v)
|
254 | return o[n][i - 1]
|
255 | }
|
256 | }
|
257 |
|
258 | XMLSplitter.prototype.cattr = function (o) {
|
259 | var self = this
|
260 | var r = {};
|
261 | for (var key in o) {
|
262 | if (o.hasOwnProperty(key) && o[key]) {
|
263 | r[key.replace(':', '$')] = o[key];
|
264 | delete o[key]
|
265 | }
|
266 | }
|
267 | return r;
|
268 | }
|
269 |
|
270 | module.exports = XMLSplitter
|