UNPKG

6.99 kBJavaScriptView Raw
1'use strict'
2
3// pickup - transform RSS or Atom XML to JSON
4
5exports = module.exports = Pickup
6
7const attribute = require('./lib/attribute')
8const mappings = require('./lib/mappings')
9const os = require('os')
10const sax = require('saxes')
11const { Entry, Feed } = require('./lib/items')
12const { StringDecoder } = require('string_decoder')
13const { Transform } = require('readable-stream')
14const { debuglog, inherits } = require('util')
15
16const debug = debuglog('pickup')
17
18function State (entry, feed, image, map, name, precedence) {
19 this.entry = entry
20 this.feed = feed
21 this.image = image
22 this.map = map
23 this.name = name
24 this.precedence = precedence
25}
26
27State.prototype.setName = function (name) {
28 this.map = mappings[name] || this.map
29 this.name = name
30}
31
32State.prototype.key = function () {
33 return this.map.get(this.name)
34}
35
36State.prototype.takesPrecedence = function () {
37 return this.precedence.has(this.name)
38}
39
40function OpenHandlers (t) {
41 this.channel = t.feedopen
42 this.feed = t.feedopen
43 this.item = t.entryopen
44 this.entry = t.entryopen
45 this.image = t.imageopen
46}
47
48function CloseHandlers (t) {
49 this.channel = t.feedclose
50 this.feed = t.feedclose
51 this.item = t.entryclose
52 this.entry = t.entryclose
53 this.image = t.imageclose
54}
55
56function encodingFromString (str) {
57 if (str.match(/utf-8/i)) {
58 return 'utf8'
59 } else if (str.match(/iso-8859-1/i)) {
60 return 'binary'
61 }
62 return 'utf8'
63}
64
65function encodingFromOpts (opts) {
66 const str = opts ? opts.charset : null
67 if (typeof str !== 'string') return null
68 return encodingFromString(str)
69}
70
71function Pickup (opts) {
72 if (!(this instanceof Pickup)) return new Pickup(opts)
73 Transform.call(this, opts)
74
75 if (!Pickup.openHandlers) {
76 Pickup.openHandlers = new OpenHandlers(Pickup.prototype)
77 Pickup.closeHandlers = new CloseHandlers(Pickup.prototype)
78 }
79
80 this.encoding = encodingFromOpts(opts)
81 this.decoder = new StringDecoder(this.encoding)
82
83 this.eventMode = opts && opts.eventMode
84
85 this.state = new State(
86 null,
87 null,
88 false,
89 new Map(),
90 '',
91 new Set(['content:encoded', 'pubDate'])
92 )
93
94 const parser = new sax.SaxesParser(opts ? opts.parser : null)
95
96 parser.ontext = (text) => {
97 const t = text.trim()
98
99 if (t.length === 0) {
100 debug('discarding text: whitespace')
101 return
102 }
103
104 const state = this.state
105 const current = state.entry || state.feed
106
107 if (!current || !state.map) return
108
109 let key = state.key()
110
111 if (key === undefined) return
112
113 if (state.image && state.name === 'url') key = 'image'
114
115 const isSet = current[key] !== undefined
116
117 if (isSet) {
118 if (!state.takesPrecedence()) {
119 return
120 } else if (state.name === 'content:encoded' && t.length > 4096) {
121 debug('%s: discarding text: too long', state.name)
122 return
123 }
124
125 debug('%s: taking precedence: ( %s, %s )', state.name, key, t)
126 }
127
128 current[key] = t
129 }
130
131 // Not differentiating between cdata and text, whichever comes first wins.
132 // This might be a mistake, but I don’t see a clear-cut alternative.
133
134 parser.oncdata = parser.ontext
135
136 parser.onerror = (er) => {
137 debug('error: %s', er)
138
139 this.emit('error', er)
140 }
141
142 const handle = (name, handlers) => {
143 if (Object.prototype.hasOwnProperty.call(handlers, name)) {
144 handlers[name].apply(this)
145 }
146 }
147
148 parser.onopentag = (node) => {
149 this.state.setName(node.name)
150 handle(node.name, Pickup.openHandlers)
151
152 const current = this.state.entry || this.state.feed
153
154 if (current) {
155 const key = this.state.key(node.name)
156
157 if (key) {
158 const keys = Object.keys(node.attributes)
159
160 if (keys.length) {
161 const kv = attribute(key, node.attributes, current)
162
163 if (kv) {
164 current[kv[0]] = kv[1]
165 }
166 }
167 }
168 }
169 }
170
171 parser.onclosetag = (node) => {
172 handle(node.name, Pickup.closeHandlers)
173 this.state.setName()
174 }
175
176 this.parser = parser
177}
178
179inherits(Pickup, Transform)
180
181Pickup.prototype.objectMode = function () {
182 return this._readableState.objectMode
183}
184
185Pickup.prototype.feedopen = function () {
186 this.state.feed = new Feed()
187}
188
189Pickup.prototype.entryopen = function () {
190 this.state.entry = new Entry()
191}
192
193Pickup.prototype.imageopen = function () {
194 this.state.image = true
195}
196
197Pickup.prototype.entryclose = function () {
198 const entry = this.state.entry
199
200 if (!entry) return
201
202 if (!this.eventMode) {
203 if (this.objectMode()) {
204 this.push(entry)
205 } else {
206 this.push(JSON.stringify(entry) + os.EOL)
207 }
208 } else {
209 this.emit('entry', entry)
210 }
211
212 this.state.entry = null
213}
214
215Pickup.prototype.feedclose = function () {
216 const feed = this.state.feed
217
218 if (!feed) return
219
220 if (!this.eventMode) {
221 if (this.objectMode()) {
222 this.push(feed)
223 } else {
224 this.push(JSON.stringify(feed) + os.EOL)
225 }
226 } else {
227 this.emit('feed', feed)
228 }
229
230 this.state.feed = null
231}
232
233Pickup.prototype.imageclose = function () {
234 this.state.image = false
235}
236
237function free (parser) {
238 parser.oncdata = null
239 parser.onclosetag = null
240 parser.onerror = null
241 parser.onopentag = null
242 parser.onready = null
243 parser.ontext = null
244}
245
246Pickup.prototype.invalidate = function () {
247 debug('invalidating')
248 this.parser.close()
249 free(this.parser)
250
251 this._decoder = null
252 this.encoding = null
253 this.parser = null
254 this.state = null
255}
256
257Pickup.prototype._destroy = function (er, cb) {
258 this.invalidate()
259 Transform.prototype._destroy.call(this, er, cb)
260}
261
262Pickup.prototype._final = function (cb) {
263 this.invalidate()
264 cb()
265}
266
267function cribEncoding (str) {
268 const enc = str.split('encoding')[1]
269 const def = 'utf8'
270
271 if (!enc) return def
272
273 if (enc.trim()[0] === '=') {
274 return encodingFromString(enc)
275 }
276
277 return def
278}
279
280Pickup.prototype._transform = function (chunk, enc, cb) {
281 if (!this._decoder) {
282 if (!this.encoding) {
283 // This, of course, fails--yielding 'utf8'--if the first chunk is too
284 // short to contain the encoding tag.
285 const t = chunk.toString('ascii', 0, 128)
286
287 this.encoding = cribEncoding(t)
288 }
289
290 this.emit('encoding', this.encoding)
291 }
292
293 const str = this.decoder.write(chunk)
294
295 if (this.parser.closed) {
296 this.parser.onready = () => {
297 this.parser.write(str)
298 cb()
299 }
300
301 return
302 }
303
304 this.parser.write(str)
305
306 cb()
307}
308
309// Exports
310
311exports.Pickup = Pickup
312exports.Feed = Feed
313exports.Entry = Entry
314
315// Extending surface area when testing.
316
317if (process.mainModule.filename.match(/test/) !== null) {
318 exports.extend = function (origin, add) {
319 return Object.assign(origin, add || Object.create(null))
320 }
321
322 exports.entry = function (obj) {
323 return exports.extend(new Entry(), obj)
324 }
325
326 exports.feed = function (obj) {
327 return exports.extend(new Feed(), obj)
328 }
329
330 exports.cribEncoding = cribEncoding
331
332 exports.EVENTS = [
333 'data',
334 'drain',
335 'readable',
336 'end',
337 'entry',
338 'error',
339 'feed',
340 'finish'
341 ]
342}