1 | 'use strict'
|
2 |
|
3 |
|
4 |
|
5 | exports = module.exports = Pickup
|
6 |
|
7 | const attribute = require('./lib/attribute')
|
8 | const mappings = require('./lib/mappings')
|
9 | const os = require('os')
|
10 | const sax = require('saxes')
|
11 | const { Entry, Feed } = require('./lib/items')
|
12 | const { StringDecoder } = require('string_decoder')
|
13 | const { Transform } = require('readable-stream')
|
14 | const { debuglog, inherits } = require('util')
|
15 |
|
16 | const debug = debuglog('pickup')
|
17 |
|
18 | function State (entry, feed, image, map, name, precedence) {
|
19 | this.entry = entry
|
20 | this.feed = feed
|
21 | this.image = image
|
22 | this.map = map
|
23 | this.name = name
|
24 | this.precedence = precedence
|
25 | }
|
26 |
|
27 | State.prototype.setName = function (name) {
|
28 | this.map = mappings[name] || this.map
|
29 | this.name = name
|
30 | }
|
31 |
|
32 | State.prototype.key = function () {
|
33 | return this.map.get(this.name)
|
34 | }
|
35 |
|
36 | State.prototype.takesPrecedence = function () {
|
37 | return this.precedence.has(this.name)
|
38 | }
|
39 |
|
40 | function OpenHandlers (t) {
|
41 | this.channel = t.feedopen
|
42 | this.feed = t.feedopen
|
43 | this.item = t.entryopen
|
44 | this.entry = t.entryopen
|
45 | this.image = t.imageopen
|
46 | }
|
47 |
|
48 | function CloseHandlers (t) {
|
49 | this.channel = t.feedclose
|
50 | this.feed = t.feedclose
|
51 | this.item = t.entryclose
|
52 | this.entry = t.entryclose
|
53 | this.image = t.imageclose
|
54 | }
|
55 |
|
56 | function encodingFromString (str) {
|
57 | if (str.match(/utf-8/i)) {
|
58 | return 'utf8'
|
59 | } else if (str.match(/iso-8859-1/i)) {
|
60 | return 'binary'
|
61 | }
|
62 | return 'utf8'
|
63 | }
|
64 |
|
65 | function encodingFromOpts (opts) {
|
66 | const str = opts ? opts.charset : null
|
67 | if (typeof str !== 'string') return null
|
68 | return encodingFromString(str)
|
69 | }
|
70 |
|
71 | function Pickup (opts) {
|
72 | if (!(this instanceof Pickup)) return new Pickup(opts)
|
73 | Transform.call(this, opts)
|
74 |
|
75 | if (!Pickup.openHandlers) {
|
76 | Pickup.openHandlers = new OpenHandlers(Pickup.prototype)
|
77 | Pickup.closeHandlers = new CloseHandlers(Pickup.prototype)
|
78 | }
|
79 |
|
80 | this.encoding = encodingFromOpts(opts)
|
81 | this.decoder = new StringDecoder(this.encoding)
|
82 |
|
83 | this.eventMode = opts && opts.eventMode
|
84 |
|
85 | this.state = new State(
|
86 | null,
|
87 | null,
|
88 | false,
|
89 | new Map(),
|
90 | '',
|
91 | new Set(['content:encoded', 'pubDate'])
|
92 | )
|
93 |
|
94 | const parser = new sax.SaxesParser(opts ? opts.parser : null)
|
95 |
|
96 | parser.ontext = (text) => {
|
97 | const t = text.trim()
|
98 |
|
99 | if (t.length === 0) {
|
100 | debug('discarding text: whitespace')
|
101 | return
|
102 | }
|
103 |
|
104 | const state = this.state
|
105 | const current = state.entry || state.feed
|
106 |
|
107 | if (!current || !state.map) return
|
108 |
|
109 | let key = state.key()
|
110 |
|
111 | if (key === undefined) return
|
112 |
|
113 | if (state.image && state.name === 'url') key = 'image'
|
114 |
|
115 | const isSet = current[key] !== undefined
|
116 |
|
117 | if (isSet) {
|
118 | if (!state.takesPrecedence()) {
|
119 | return
|
120 | } else if (state.name === 'content:encoded' && t.length > 4096) {
|
121 | debug('%s: discarding text: too long', state.name)
|
122 | return
|
123 | }
|
124 |
|
125 | debug('%s: taking precedence: ( %s, %s )', state.name, key, t)
|
126 | }
|
127 |
|
128 | current[key] = t
|
129 | }
|
130 |
|
131 |
|
132 |
|
133 |
|
134 | parser.oncdata = parser.ontext
|
135 |
|
136 | parser.onerror = (er) => {
|
137 | debug('error: %s', er)
|
138 |
|
139 | this.emit('error', er)
|
140 | }
|
141 |
|
142 | const handle = (name, handlers) => {
|
143 | if (Object.prototype.hasOwnProperty.call(handlers, name)) {
|
144 | handlers[name].apply(this)
|
145 | }
|
146 | }
|
147 |
|
148 | parser.onopentag = (node) => {
|
149 | this.state.setName(node.name)
|
150 | handle(node.name, Pickup.openHandlers)
|
151 |
|
152 | const current = this.state.entry || this.state.feed
|
153 |
|
154 | if (current) {
|
155 | const key = this.state.key(node.name)
|
156 |
|
157 | if (key) {
|
158 | const keys = Object.keys(node.attributes)
|
159 |
|
160 | if (keys.length) {
|
161 | const kv = attribute(key, node.attributes, current)
|
162 |
|
163 | if (kv) {
|
164 | current[kv[0]] = kv[1]
|
165 | }
|
166 | }
|
167 | }
|
168 | }
|
169 | }
|
170 |
|
171 | parser.onclosetag = (node) => {
|
172 | handle(node.name, Pickup.closeHandlers)
|
173 | this.state.setName()
|
174 | }
|
175 |
|
176 | this.parser = parser
|
177 | }
|
178 |
|
179 | inherits(Pickup, Transform)
|
180 |
|
181 | Pickup.prototype.objectMode = function () {
|
182 | return this._readableState.objectMode
|
183 | }
|
184 |
|
185 | Pickup.prototype.feedopen = function () {
|
186 | this.state.feed = new Feed()
|
187 | }
|
188 |
|
189 | Pickup.prototype.entryopen = function () {
|
190 | this.state.entry = new Entry()
|
191 | }
|
192 |
|
193 | Pickup.prototype.imageopen = function () {
|
194 | this.state.image = true
|
195 | }
|
196 |
|
197 | Pickup.prototype.entryclose = function () {
|
198 | const entry = this.state.entry
|
199 |
|
200 | if (!entry) return
|
201 |
|
202 | if (!this.eventMode) {
|
203 | if (this.objectMode()) {
|
204 | this.push(entry)
|
205 | } else {
|
206 | this.push(JSON.stringify(entry) + os.EOL)
|
207 | }
|
208 | } else {
|
209 | this.emit('entry', entry)
|
210 | }
|
211 |
|
212 | this.state.entry = null
|
213 | }
|
214 |
|
215 | Pickup.prototype.feedclose = function () {
|
216 | const feed = this.state.feed
|
217 |
|
218 | if (!feed) return
|
219 |
|
220 | if (!this.eventMode) {
|
221 | if (this.objectMode()) {
|
222 | this.push(feed)
|
223 | } else {
|
224 | this.push(JSON.stringify(feed) + os.EOL)
|
225 | }
|
226 | } else {
|
227 | this.emit('feed', feed)
|
228 | }
|
229 |
|
230 | this.state.feed = null
|
231 | }
|
232 |
|
233 | Pickup.prototype.imageclose = function () {
|
234 | this.state.image = false
|
235 | }
|
236 |
|
237 | function free (parser) {
|
238 | parser.oncdata = null
|
239 | parser.onclosetag = null
|
240 | parser.onerror = null
|
241 | parser.onopentag = null
|
242 | parser.onready = null
|
243 | parser.ontext = null
|
244 | }
|
245 |
|
246 | Pickup.prototype.invalidate = function () {
|
247 | debug('invalidating')
|
248 | this.parser.close()
|
249 | free(this.parser)
|
250 |
|
251 | this._decoder = null
|
252 | this.encoding = null
|
253 | this.parser = null
|
254 | this.state = null
|
255 | }
|
256 |
|
257 | Pickup.prototype._destroy = function (er, cb) {
|
258 | this.invalidate()
|
259 | Transform.prototype._destroy.call(this, er, cb)
|
260 | }
|
261 |
|
262 | Pickup.prototype._final = function (cb) {
|
263 | this.invalidate()
|
264 | cb()
|
265 | }
|
266 |
|
267 | function cribEncoding (str) {
|
268 | const enc = str.split('encoding')[1]
|
269 | const def = 'utf8'
|
270 |
|
271 | if (!enc) return def
|
272 |
|
273 | if (enc.trim()[0] === '=') {
|
274 | return encodingFromString(enc)
|
275 | }
|
276 |
|
277 | return def
|
278 | }
|
279 |
|
280 | Pickup.prototype._transform = function (chunk, enc, cb) {
|
281 | if (!this._decoder) {
|
282 | if (!this.encoding) {
|
283 |
|
284 |
|
285 | const t = chunk.toString('ascii', 0, 128)
|
286 |
|
287 | this.encoding = cribEncoding(t)
|
288 | }
|
289 |
|
290 | this.emit('encoding', this.encoding)
|
291 | }
|
292 |
|
293 | const str = this.decoder.write(chunk)
|
294 |
|
295 | if (this.parser.closed) {
|
296 | this.parser.onready = () => {
|
297 | this.parser.write(str)
|
298 | cb()
|
299 | }
|
300 |
|
301 | return
|
302 | }
|
303 |
|
304 | this.parser.write(str)
|
305 |
|
306 | cb()
|
307 | }
|
308 |
|
309 |
|
310 |
|
311 | exports.Pickup = Pickup
|
312 | exports.Feed = Feed
|
313 | exports.Entry = Entry
|
314 |
|
315 |
|
316 |
|
317 | if (process.mainModule.filename.match(/test/) !== null) {
|
318 | exports.extend = function (origin, add) {
|
319 | return Object.assign(origin, add || Object.create(null))
|
320 | }
|
321 |
|
322 | exports.entry = function (obj) {
|
323 | return exports.extend(new Entry(), obj)
|
324 | }
|
325 |
|
326 | exports.feed = function (obj) {
|
327 | return exports.extend(new Feed(), obj)
|
328 | }
|
329 |
|
330 | exports.cribEncoding = cribEncoding
|
331 |
|
332 | exports.EVENTS = [
|
333 | 'data',
|
334 | 'drain',
|
335 | 'readable',
|
336 | 'end',
|
337 | 'entry',
|
338 | 'error',
|
339 | 'feed',
|
340 | 'finish'
|
341 | ]
|
342 | }
|