UNPKG

6.08 kBJavaScriptView Raw
1'use strict';
2var sax = require('sax'),
3 fs = require('fs'),
4 util = require('util'),
5 events = require('events'),
6 clone = require('clone')
7// ,debug = require('debug')('XS')
8;
9
10
11function dispose(o) {
12 for (var p in o) {
13 if (o.hasOwnProperty(p)) {
14 if (isNaN(parseInt(p))) {
15 dispose(o[p]);
16 }
17 delete o[p]
18 }
19 }
20}
21function wash(o) {
22 for (var p in o) {
23 if (o.hasOwnProperty(p)) {
24 if (isNaN(parseInt(p))) {
25 wash(o[p], d - 1);
26 }
27 if (Object.keys(o[p]).length) {
28 delete o[p]
29 }
30 }
31 }
32}
33
34
35
36
37
38function within(arr1, arr2) {
39 for (var i = 0; i < arr1.length; i++) {
40 if (arr1.indexOf(i) !== arr2.indexOf(i)) {
41 return false
42 }
43 }
44 return true
45}
46
47function string2path(s)
48{
49 if (typeof s === 'string') {
50 var k = {}, pattern
51 k.xpath = s.toLowerCase()
52 pattern = '^'
53 k.xpath.split('/').forEach(function (item, indice) {
54 if (indice === 0) {
55 return
56 }
57 if (item === '') {
58 pattern += '.*'
59 }
60 else if (item === '*') {
61 pattern += '/' + '[^/]+'
62 }
63 else {
64 pattern += '/' + item
65 }
66 })
67 pattern += '$'
68 k.regex = new RegExp(pattern)
69 return k
70 }
71}
72
73function XMLSplitter(knife, opt) {
74
75 if (!(this instanceof XMLSplitter)) {
76 return new XMLSplitter()
77 }
78
79 events.EventEmitter.call(this)
80
81
82 var self = this
83 var tagname = 'row'
84
85 // Propreties
86 self.stack = []
87 self.path = []
88 self.cdata = ''
89 self.knifes = []
90 self.counter = 0
91 self.track = []
92 self.stream = require("sax").createStream(true, {trim : true})
93 self.opt = opt || {}
94 self.regular = (self.opt.regular === true ? true : false)
95
96 // Adding this function, avoid this execption :
97 //
98 // stream.js:74
99 // dest.destroy();
100 // ^
101 //TypeError: Object #<SAXStream> has no method 'destroy'
102 // at IncomingMessage.onclose (stream.js:74:10)
103 // at IncomingMessage.EventEmitter.emit (events.js:115:20)
104 // at abortIncoming (http.js:1641:11)
105 // at Socket.serverSocketCloseListener (http.js:1651:5)
106 // at Socket.EventEmitter.emit (events.js:115:20)
107 // at Socket._destroy.destroyed (net.js:358:10)
108 // at process.startup.processNextTick.process._tickCallback (node.js:244:9)
109 self.stream.destroy = function() {
110 self.emit('close', new Error('The stream seem to be destroyed'))
111 }
112
113
114
115 // Polymoprphisme
116 if (Array.isArray(knife)) {
117 knife.forEach(function (item) {
118 self.knifes.push(string2path(item))
119 }
120 )
121 }
122 else {
123 self.knifes.push(string2path(knife))
124 }
125 if (self.knifes.length === 0) {
126 throw new Error('Invalid Parameter')
127 }
128 // console.log(self.knifes)
129
130 // Stream's handle
131 self.stream.on('close', function (e) {
132 self.emit('close', e)
133 })
134
135 // Sax's handles
136 self.stream.on('error', function (e) {
137 this._parser.error = null
138 this._parser.resume()
139 self.emit('error', e)
140 }
141 )
142 self.stream.on('processinginstruction', function (pi) {
143 })
144 self.stream.on('text', function (v) {
145 self.cvalue('$t', v)
146 })
147 self.stream.on('comment', function (v) {
148 self.cvalue('$c', v)
149 })
150 self.stream.on('cdata', function (v) {
151 self.cdata += v
152 })
153 self.stream.on('opencdata', function () {
154 self.cdata = ''
155 })
156 self.stream.on('closecdata', function () {
157 self.cvalue('$cd', self.cdata)
158 self.cdata = ''
159 })
160 self.stream.on('opentag', function (node) {
161 self.path.push(node.name.toLowerCase())
162
163 var p = '/' + self.path.join('/')
164
165 self.track.push(self.knifes.reduce(function (prev, cur) {
166 return (prev === true || cur.regex.test(p))
167 }, false))
168
169 // avoids memory inflation
170 if (!self.track.some(function (x) { return x })) {
171 self.stack.push({})
172 return
173 }
174
175 if (self.stack.length === 1 && node.name === tagname) {
176 self.stack.push({})
177 }
178 else {
179 self.stack.push(self.cvalue(node.name, self.cattr(node.attributes)))
180 }
181
182 })
183 self.stream.on('closetag', function (tag) {
184 var p = '/' + self.path.join('/')
185 var l = self.stack.pop()
186 var n = tag.replace(':', '$')
187
188
189 self.track.pop()
190 var t2 = self.knifes.reduce(function (prev, cur) {
191 return (prev === true || cur.regex.test(p))
192 }, false)
193
194 if (t2) {
195 self.emit('data', clone(l, false), n, p)
196 if (self.regular) {
197 // debug('1='+p, n, util.inspect(l, false, null, true))
198 // debug('2='+p, n, util.inspect(self.stack, false, null, true))
199 dispose(l)
200 l = null
201 wash(self.stack)
202 // debug('3='+p, n, util.inspect(self.stack, false, null, true))
203 // debug('4=', self.path.length)
204 }
205 self.counter++
206 }
207 self.path.pop()
208 if (self.path.length === 0) {
209 self.emit('end', self.counter)
210 }
211 })
212 self.stream.on('ready', function () {
213 self.stack.pop()
214 })
215 self.stream.on('end', function () {
216 if (self.stack.length !== 0) {
217 self.emit('end', self.counter)
218 }
219 }
220 )
221}
222util.inherits(XMLSplitter, events.EventEmitter)
223
224XMLSplitter.prototype.parseString = function (string, encoding) {
225 var self = this
226 self.stream.end(string, encoding || 'utf8')
227}
228
229XMLSplitter.prototype.parseStream = function (stream) {
230 var self = this
231 stream.pipe(self.stream)
232}
233
234XMLSplitter.prototype.cvalue = function (n, v) {
235 var self = this
236 n = n.replace(':', '$')
237 var o = self.stack[self.stack.length - 1]
238 if (o === undefined) {
239 o = {}
240 o[n] = v
241 return o[n]
242 }
243 else if (o[n] === undefined) {
244 o[n] = v
245 return o[n]
246 }
247 else if (!Array.isArray(o[n])) {
248 var x = o[n]
249 o[n] = new Array(x, v)
250 return o[n][1]
251 }
252 else {
253 var i = o[n].push(v)
254 return o[n][i - 1]
255 }
256}
257
258XMLSplitter.prototype.cattr = function (o) {
259 var self = this
260 var r = {};
261 for (var key in o) {
262 if (o.hasOwnProperty(key) && o[key]) {
263 r[key.replace(':', '$')] = o[key];
264 delete o[key]
265 }
266 }
267 return r;
268}
269
270module.exports = XMLSplitter