UNPKG

7.35 kBJavaScriptView Raw
1'use strict'
2
3// pickup - transform RSS or Atom XML to JSON
4
5exports = module.exports = Pickup
6
7const StringDecoder = require('string_decoder').StringDecoder
8const attribute = require('./lib/attribute')
9const debug = require('util').debuglog('pickup')
10const mappings = require('./lib/mappings')
11const os = require('os')
12const sax = require('sax')
13const stream = require('readable-stream')
14const util = require('util')
15
16function OpenHandlers (t) {
17 this.channel = t.feedopen
18 this.feed = t.feedopen
19 this.item = t.entryopen
20 this.entry = t.entryopen
21 this.image = t.imageopen
22}
23
24function CloseHandlers (t) {
25 this.channel = t.feedclose
26 this.feed = t.feedclose
27 this.item = t.entryclose
28 this.entry = t.entryclose
29 this.image = t.imageclose
30}
31
32function Opts (trim, normalize, position) {
33 this.trim = trim
34 this.normalize = normalize
35 this.position = position
36}
37
38function encodingFromString (str) {
39 if (str.match(/utf-8/i)) {
40 return 'utf8'
41 } else if (str.match(/iso-8859-1/i)) {
42 return 'binary'
43 }
44 return 'utf8'
45}
46
47function encodingFromOpts (opts) {
48 const str = opts ? opts.charset : null
49 if (typeof str !== 'string') return null
50 return encodingFromString(str)
51}
52
53const saxOpts = new Opts(true, true, false)
54
55util.inherits(Pickup, stream.Transform)
56function Pickup (opts) {
57 if (!(this instanceof Pickup)) return new Pickup(opts)
58 stream.Transform.call(this, opts)
59
60 if (!Pickup.openHandlers) {
61 Pickup.openHandlers = new OpenHandlers(Pickup.prototype)
62 Pickup.closeHandlers = new CloseHandlers(Pickup.prototype)
63 }
64
65 this.encoding = encodingFromOpts(opts)
66 this.decoder = new StringDecoder(this.encoding)
67
68 this.eventMode = opts && opts.eventMode
69 this.map = null
70 this.parser = sax.parser(true, saxOpts)
71 this.state = new State()
72
73 const parser = this.parser
74
75 parser.ontext = (t) => {
76 const current = this.current()
77 const map = this.map
78 const state = this.state
79 const name = this.state.name
80
81 if (!current || !map) return
82
83 let key = map.get(name)
84 if (key === undefined) return
85
86 if (state.image && name === 'url') key = 'image'
87
88 const isSet = current[key] !== undefined
89
90 // First wins, except 'content:encoded' summary of reasonable length.
91
92 if (isSet) {
93 const shouldOverride = () => {
94 if (key === 'summary') {
95 return name === 'content:encoded' && t.length < 4096
96 }
97 return false
98 }
99
100 if (!shouldOverride()) {
101 return
102 }
103
104 debug('overriding %s with %s', key, name)
105 }
106
107 current[key] = t
108 }
109
110 parser.oncdata = (d) => {
111 parser.ontext(d)
112 }
113
114 const handle = (name, handlers) => {
115 if (handlers.hasOwnProperty(name)) {
116 handlers[name].apply(this)
117 }
118 }
119 parser.onopentag = (node) => {
120 const name = node.name
121 this.state.name = name
122 this.map = mappings[name] || this.map
123 handle(name, Pickup.openHandlers)
124 const current = this.current()
125 if (current) {
126 const key = this.map.get(name)
127 if (key) {
128 const attributes = node.attributes
129 const keys = Object.keys(attributes)
130 if (keys.length) {
131 const kv = attribute(key, attributes, current)
132 if (kv) {
133 current[kv[0]] = kv[1]
134 }
135 }
136 }
137 }
138 }
139
140 parser.onclosetag = (name) => {
141 handle(name, Pickup.closeHandlers)
142 this.state.name = null
143 }
144}
145
146Pickup.prototype.current = function () {
147 return this.state.entry || this.state.feed
148}
149
150Pickup.prototype.objectMode = function () {
151 return this._readableState.objectMode
152}
153
154Pickup.prototype.feedopen = function () {
155 const feed = this.state.feed
156 if (feed) { debug('nested feed: ', feed) }
157 this.state.feed = new Feed()
158}
159
160Pickup.prototype.entryopen = function () {
161 const entry = this.state.entry
162 if (entry) { debug('nested entry: ', entry) }
163 this.state.entry = new Entry()
164}
165
166Pickup.prototype.imageopen = function () {
167 this.state.image = true
168}
169
170Pickup.prototype.entryclose = function () {
171 const entry = this.state.entry
172 if (!entry) { return }
173
174 if (!this.eventMode) {
175 if (this.objectMode()) {
176 this.push(entry)
177 } else {
178 this.push(JSON.stringify(entry) + os.EOL)
179 }
180 } else {
181 this.emit('entry', entry)
182 }
183 this.state.entry = null
184}
185
186Pickup.prototype.feedclose = function () {
187 const feed = this.state.feed
188 if (!feed) { return }
189
190 if (!this.eventMode) {
191 if (this.objectMode()) {
192 this.push(feed)
193 } else {
194 this.push(JSON.stringify(feed) + os.EOL)
195 }
196 } else {
197 this.emit('feed', feed)
198 }
199 this.state.feed = null
200}
201
202Pickup.prototype.imageclose = function () {
203 this.state.image = false
204}
205
206function free (parser) {
207 parser.oncdata = null
208 parser.onclosetag = null
209 parser.onopentag = null
210 parser.ontext = null
211}
212
213Pickup.prototype._flush = function (cb) {
214 free(this.parser)
215 this.parser.close()
216
217 this._decoder = null
218
219 this.encoding = null
220 this.map = null
221 this.parser = null
222
223 this.state.deinit()
224 this.state = null
225
226 cb()
227}
228
229function cribEncoding (str) {
230 const enc = str.split('encoding')[1]
231 const def = 'utf8'
232 if (!enc) return def
233 if (enc.trim()[0] === '=') {
234 return encodingFromString(enc)
235 }
236 return def
237}
238
239Pickup.prototype._transform = function (chunk, enc, cb) {
240 if (!this._decoder) {
241 if (!this.encoding) {
242 // This, of course, fails--yielding 'utf8'--if the first chunk is too
243 // short to contain the encoding tag.
244 const t = chunk.toString('ascii', 0, 128)
245 this.encoding = cribEncoding(t)
246 }
247 this.emit('encoding', this.encoding)
248 }
249 const str = this.decoder.write(chunk)
250 const er = this.parser.write(str).error
251 this.parser.error = null
252 cb(er)
253}
254
255function Entry (
256 author
257, duration
258, enclosure
259, id
260, image
261, link
262, originalURL
263, subtitle
264, summary
265, title
266, updated
267, url) {
268 this.author = author
269 this.duration = duration
270 this.enclosure = enclosure
271 this.feed = feed
272 this.id = id
273 this.image = image
274 this.link = link
275 this.originalURL = originalURL
276 this.subtitle = subtitle
277 this.summary = summary
278 this.title = title
279 this.updated = updated
280 this.url = url
281}
282
283function Feed (
284 author
285, copyright
286, id
287, image
288, language
289, link
290, originalURL
291, payment
292, subtitle
293, summary
294, title
295, ttl
296, updated
297, url) {
298 this.author = author
299 this.copyright = copyright
300 this.id = id
301 this.image = image
302 this.language = language
303 this.link = link
304 this.originalURL = originalURL
305 this.payment = payment
306 this.subtitle = subtitle
307 this.summary = summary
308 this.title = title
309 this.ttl = ttl
310 this.updated = updated
311 this.url = url
312}
313
314function State (entry, feed, image, name) {
315 this.entry = entry
316 this.feed = feed
317 this.image = image
318 this.name = name
319}
320
321State.prototype.deinit = function () {
322 this.entry = null
323 this.feed = null
324 this.image = false
325 this.name = undefined // String()
326}
327
328function extend (origin, add) {
329 return Object.assign(origin, add || Object.create(null))
330}
331function entry (obj) {
332 return extend(new Entry(), obj)
333}
334function feed (obj) {
335 return extend(new Feed(), obj)
336}
337
338if (process.mainModule.filename.match(/test/) !== null) {
339 exports.cribEncoding = cribEncoding
340 exports.entry = entry
341 exports.feed = feed
342 exports.EVENTS = [
343 'data',
344 'drain',
345 'readable',
346 'end',
347 'entry',
348 'error',
349 'feed',
350 'finish'
351 ]
352}