1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 | var sax = require('sax')
|
13 | , addressparser = require('addressparser')
|
14 | , indexOfObject = require('array-indexofobject')
|
15 | , util = require('util')
|
16 | , TransformStream = require('readable-stream').Transform
|
17 | , _ = require('../utils');
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 |
|
53 |
|
54 |
|
55 |
|
56 |
|
57 |
|
58 |
|
59 |
|
60 |
|
61 | function FeedParser (options) {
|
62 | if (!(this instanceof FeedParser)) return new FeedParser(options);
|
63 | TransformStream.call(this);
|
64 | this._readableState.objectMode = true;
|
65 | this._readableState.highWaterMark = 16;
|
66 |
|
67 | this.init();
|
68 |
|
69 |
|
70 | this.options = _.assign({}, options);
|
71 | if (!('strict' in this.options)) this.options.strict = false;
|
72 | if (!('normalize' in this.options)) this.options.normalize = true;
|
73 | if (!('addmeta' in this.options)) this.options.addmeta = true;
|
74 | if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true;
|
75 | if ('MAX_BUFFER_LENGTH' in this.options) {
|
76 | sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH;
|
77 | } else {
|
78 | sax.MAX_BUFFER_LENGTH = 16 * 1024 * 1024;
|
79 | }
|
80 | if (this.options.feedurl) this.xmlbase.unshift({ '#name': 'xml', '#': this.options.feedurl});
|
81 |
|
82 |
|
83 | this.stream = sax.createStream(this.options.strict , {lowercase: true, xmlns: true });
|
84 | this.stream.on('error', this.handleSaxError.bind(this));
|
85 | this.stream.on('processinginstruction', this.handleProcessingInstruction.bind(this));
|
86 | this.stream.on('opentag', this.handleOpenTag.bind(this));
|
87 | this.stream.on('closetag',this.handleCloseTag.bind(this));
|
88 | this.stream.on('text', this.handleText.bind(this));
|
89 | this.stream.on('cdata', this.handleText.bind(this));
|
90 | this.stream.on('end', this.handleEnd.bind(this));
|
91 | }
|
92 | util.inherits(FeedParser, TransformStream);
|
93 |
|
94 |
|
95 |
|
96 |
|
97 |
|
98 |
|
99 | FeedParser.prototype.init = function (){
|
100 | this.meta = {
|
101 | '#ns': [],
|
102 | '@': [],
|
103 | '#xml': {}
|
104 | };
|
105 | this._emitted_meta = false;
|
106 | this.stack = [];
|
107 | this.xmlbase = [];
|
108 | this.in_xhtml = false;
|
109 | this.xhtml = {}; |
110 |
|
111 |
|
112 | this.errors = [];
|
113 | };
|
114 |
|
115 | FeedParser.prototype.handleEnd = function (){
|
116 |
|
117 |
|
118 | if (!(this.meta && this.meta['#type'])) {
|
119 | var e = new Error('Not a feed');
|
120 | return this.handleError(e);
|
121 | }
|
122 | this.push(null);
|
123 | };
|
124 |
|
125 | FeedParser.prototype.handleSaxError = function (e) {
|
126 | this.emit('error', e);
|
127 | if (this.options.resume_saxerror) {
|
128 | this.resumeSaxError();
|
129 | }
|
130 | };
|
131 |
|
132 | FeedParser.prototype.resumeSaxError = function () {
|
133 | if (this.stream._parser) {
|
134 | this.stream._parser.error = null;
|
135 | this.stream._parser.resume();
|
136 | }
|
137 | };
|
138 |
|
139 | FeedParser.prototype.handleError = function (e){
|
140 | this.emit('error', e);
|
141 | };
|
142 |
|
143 |
|
144 |
|
145 | FeedParser.prototype.handleProcessingInstruction = function (node) {
|
146 | if (node.name === 'xml') {
|
147 | this.meta['#xml'] = node.body.trim().split(/\s+/).reduce(function (map, attr) {
|
148 | if (attr.indexOf('=') >= 0) {
|
149 | var parts = attr.split('=');
|
150 | map[parts[0]] = parts[1] && parts[1].length > 2 && parts[1].match(/^.(.*?).$/)[1];
|
151 | }
|
152 | return map;
|
153 | }, this.meta['#xml']);
|
154 | }
|
155 | };
|
156 |
|
157 | FeedParser.prototype.handleOpenTag = function (node){
|
158 | var n = {};
|
159 | n['#name'] = node.name;
|
160 | n['#prefix'] = node.prefix;
|
161 | n['#local'] = node.local;
|
162 | n['#uri'] = node.uri;
|
163 | n['@'] = {};
|
164 | n['#'] = '';
|
165 |
|
166 | if (Object.keys(node.attributes).length) {
|
167 | n['@'] = this.handleAttributes(node.attributes, n['#name']);
|
168 | }
|
169 |
|
170 | if (this.in_xhtml && this.xhtml['#name'] != n['#name']) {
|
171 |
|
172 | this.xhtml['#'] += '<'+n['#name'];
|
173 | Object.keys(n['@']).forEach(function(name){
|
174 | this.xhtml['#'] += ' '+ name +'="'+ n['@'][name] + '"';
|
175 | }, this);
|
176 | this.xhtml['#'] += '>';
|
177 | } else if ( this.stack.length === 0 &&
|
178 | (n['#name'] === 'rss' ||
|
179 | (n['#local'] === 'rdf' && _.nslookup([n['#uri']], 'rdf')) ||
|
180 | (n['#local'] === 'feed'&& _.nslookup([n['#uri']], 'atom')) ) ) {
|
181 | Object.keys(n['@']).forEach(function(name) {
|
182 | var o = {};
|
183 | if (name != 'version') {
|
184 | o[name] = n['@'][name];
|
185 | this.meta['@'].push(o);
|
186 | }
|
187 | }, this);
|
188 | switch(n['#local']) {
|
189 | case 'rss':
|
190 | this.meta['#type'] = 'rss';
|
191 | this.meta['#version'] = n['@']['version'];
|
192 | break;
|
193 | case 'rdf':
|
194 | this.meta['#type'] = 'rdf';
|
195 | this.meta['#version'] = n['@']['version'] || '1.0';
|
196 | break;
|
197 | case 'feed':
|
198 | this.meta['#type'] = 'atom';
|
199 | this.meta['#version'] = n['@']['version'] || '1.0';
|
200 | break;
|
201 | }
|
202 | }
|
203 | this.stack.unshift(n);
|
204 | };
|
205 |
|
206 | FeedParser.prototype.handleCloseTag = function (el){
|
207 | var node = {
|
208 | '#name': el,
|
209 | '#prefix': '',
|
210 | '#local' : ''
|
211 | }
|
212 | , stdEl
|
213 | , item
|
214 | , baseurl
|
215 | ;
|
216 | var n = this.stack.shift();
|
217 | el = el.split(':');
|
218 |
|
219 | if (el.length > 1 && el[0] === n['#prefix']) {
|
220 | if (_.nslookup(n['#uri'], 'atom')) {
|
221 | node['#prefix'] = el[0];
|
222 | node['#local'] = el.slice(1).join(':');
|
223 | node['#type'] = 'atom';
|
224 | } else if (_.nslookup(n['#uri'], 'rdf')) {
|
225 | node['#prefix'] = el[0];
|
226 | node['#local'] = el.slice(1).join(':');
|
227 | node['#type'] = 'rdf';
|
228 | } else {
|
229 | node['#prefix'] = _.nsprefix(n['#uri']) || n['#prefix'];
|
230 | node['#local'] = el.slice(1).join(':');
|
231 | }
|
232 | } else {
|
233 | node['#local'] = node['#name'];
|
234 | node['#type'] = _.nsprefix(n['#uri']) || n['#prefix'];
|
235 | }
|
236 | delete n['#name'];
|
237 | delete n['#local'];
|
238 | delete n['#prefix'];
|
239 | delete n['#uri'];
|
240 |
|
241 | if (this.xmlbase && this.xmlbase.length) {
|
242 | baseurl = this.xmlbase[0]['#'];
|
243 | }
|
244 |
|
245 | var mayHaveResolvableUrl = (
|
246 | (
|
247 | (node['#local'] === 'logo' || node['#local'] === 'icon') && node['#type'] === 'atom'
|
248 | ) ||
|
249 | (
|
250 | node['#local'] === 'link'
|
251 | )
|
252 | );
|
253 | if (baseurl && mayHaveResolvableUrl) {
|
254 |
|
255 |
|
256 | n['#'] = _.resolve(baseurl, n['#']);
|
257 | }
|
258 |
|
259 | if (this.xmlbase.length && (el == this.xmlbase[0]['#name'])) {
|
260 | void this.xmlbase.shift();
|
261 | }
|
262 |
|
263 | if (this.in_xhtml) {
|
264 | if (node['#name'] == this.xhtml['#name']) {
|
265 |
|
266 |
|
267 | n['#'] += this.xhtml['#'].trim();
|
268 |
|
269 | for (var key in n) {
|
270 | if (key != '@' && key != '#') {
|
271 | delete n[key];
|
272 | }
|
273 | }
|
274 | this.xhtml = {};
|
275 | this.in_xhtml = false;
|
276 | } else {
|
277 | this.xhtml['#'] += '</' + node['#name'] + '>';
|
278 | }
|
279 | }
|
280 |
|
281 | if ('#' in n) {
|
282 | if (n['#'].match(/^\s*$/)) {
|
283 |
|
284 | delete n['#'];
|
285 | } else {
|
286 | n['#'] = n['#'].trim();
|
287 | if (Object.keys(n).length === 1) {
|
288 |
|
289 | n = n['#'];
|
290 | }
|
291 | }
|
292 | }
|
293 |
|
294 | if (node['#name'] === 'item' ||
|
295 | node['#name'] === 'entry' ||
|
296 | (node['#local'] === 'item' && (node['#prefix'] === '' || node['#type'] === 'rdf')) ||
|
297 | (node['#local'] == 'entry' && (node['#prefix'] === '' || node['#type'] === 'atom'))) {
|
298 |
|
299 | if (!this.meta.title) {
|
300 | _.assign(this.meta, this.handleMeta(this.stack[0], this.meta['#type'], this.options));
|
301 | if (!this._emitted_meta) {
|
302 | this.emit('meta', this.meta);
|
303 | this._emitted_meta = true;
|
304 | }
|
305 | }
|
306 | if (!baseurl && this.xmlbase && this.xmlbase.length) {
|
307 | n = _.reresolve(n, this.xmlbase[0]['#']);
|
308 | }
|
309 | item = this.handleItem(n, this.meta['#type'], this.options);
|
310 | if (this.options.addmeta) {
|
311 | item.meta = this.meta;
|
312 | }
|
313 | if (this.meta.author && !item.author) item.author = this.meta.author;
|
314 | this.push(item);
|
315 | } else if (!this.meta.title &&
|
316 | (node['#name'] === 'channel' ||
|
317 | node['#name'] === 'feed' ||
|
318 | (node['#local'] === 'channel' && (node['#prefix'] === '' || node['#type'] === 'rdf')) ||
|
319 | (node['#local'] === 'feed' && (node['#prefix'] === '' || node['#type'] === 'atom')) ) ) {
|
320 | _.assign(this.meta, this.handleMeta(n, this.meta['#type'], this.options));
|
321 | if (!this._emitted_meta) {
|
322 | this.emit('meta', this.meta);
|
323 | this._emitted_meta = true;
|
324 | }
|
325 | }
|
326 |
|
327 | if (this.stack.length > 0) {
|
328 | if (node['#prefix'] && node['#local'] && !node['#type']) {
|
329 | stdEl = node['#prefix'] + ':' + node['#local'];
|
330 | } else if (node['#name'] && node['#type'] && node['#type'] !== this.meta['#type']) {
|
331 | stdEl = node['#name'];
|
332 | } else {
|
333 | stdEl = node['#local'] || node['#name'];
|
334 | }
|
335 | if (!this.stack[0].hasOwnProperty(stdEl)) {
|
336 | this.stack[0][stdEl] = n;
|
337 | } else if (this.stack[0][stdEl] instanceof Array) {
|
338 | this.stack[0][stdEl].push(n);
|
339 | } else {
|
340 | this.stack[0][stdEl] = [this.stack[0][stdEl], n];
|
341 | }
|
342 | }
|
343 | };
|
344 |
|
345 | FeedParser.prototype.handleText = function (text){
|
346 | if (this.in_xhtml) {
|
347 | this.xhtml['#'] += text;
|
348 | } else {
|
349 | if (this.stack.length) {
|
350 | if (this.stack[0] && '#' in this.stack[0]) {
|
351 | this.stack[0]['#'] += text;
|
352 | } else {
|
353 | this.stack[0]['#'] = text;
|
354 | }
|
355 | }
|
356 | }
|
357 | };
|
358 |
|
359 | FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) {
|
360 | |
361 |
|
362 |
|
363 |
|
364 |
|
365 |
|
366 |
|
367 |
|
368 |
|
369 |
|
370 |
|
371 | var basepath = ''
|
372 | , simplifiedAttributes = {}
|
373 | ;
|
374 |
|
375 | if (this.xmlbase && this.xmlbase.length) {
|
376 | basepath = this.xmlbase[0]['#'];
|
377 | }
|
378 |
|
379 | Object.keys(attrs).forEach(function(key){
|
380 | var attr = attrs[key]
|
381 | , ns = {}
|
382 | , prefix = ''
|
383 | ;
|
384 | if (attr.prefix === 'xmlns') {
|
385 | ns[attr.name] = attr.value;
|
386 | this.meta['#ns'].push(ns);
|
387 | }
|
388 |
|
389 |
|
390 | if (attr.uri && attr.prefix && !_.nslookup(attr.uri, attr.prefix) || _.nslookup(attr.uri, 'xml')) {
|
391 | prefix = ( _.nsprefix(attr.uri) || attr.prefix ) + ( attr.local ? ':' : '' );
|
392 | }
|
393 | if (basepath && (attr.local == 'href' || attr.local == 'src' || attr.local == 'uri')) {
|
394 |
|
395 |
|
396 | attr.value = _.resolve(basepath, attr.value);
|
397 | } else if (attr.local === 'base' && _.nslookup(attr.uri, 'xml')) {
|
398 |
|
399 | if (basepath) {
|
400 | attr.value = _.resolve(basepath, attr.value);
|
401 | }
|
402 | this.xmlbase.unshift({ '#name': el, '#': attr.value});
|
403 | } else if (attr.name === 'type' && attr.value === 'xhtml') {
|
404 | this.in_xhtml = true;
|
405 | this.xhtml = {'#name': el, '#': ''};
|
406 | }
|
407 | simplifiedAttributes[prefix + attr.local] = attr.value ? attr.value.trim() : '';
|
408 | }, this);
|
409 | return simplifiedAttributes;
|
410 | };
|
411 |
|
412 | FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
|
413 | if (!type || !node) return {};
|
414 |
|
415 | var meta = {}
|
416 | , normalize = !options || (options && options.normalize)
|
417 | ;
|
418 |
|
419 | if (normalize) {
|
420 | ['title','description','date', 'pubdate', 'pubDate','link', 'xmlurl', 'xmlUrl','author','language','favicon','copyright','generator'].forEach(function (property){
|
421 | meta[property] = null;
|
422 | });
|
423 | meta.cloud = {};
|
424 | meta.image = {};
|
425 | meta.categories = [];
|
426 | }
|
427 |
|
428 | Object.keys(node).forEach(function(name){
|
429 | var el = node[name];
|
430 |
|
431 | if (normalize) {
|
432 | switch(name){
|
433 | case('title'):
|
434 | meta.title = _.get(el);
|
435 | break;
|
436 | case('description'):
|
437 | case('subtitle'):
|
438 | meta.description = _.get(el);
|
439 | break;
|
440 | case('pubdate'):
|
441 | case('lastbuilddate'):
|
442 | case('published'):
|
443 | case('modified'):
|
444 | case('updated'):
|
445 | case('dc:date'):
|
446 | var date = _.get(el) ? new Date(_.get(el)) : null;
|
447 | if (!date) break;
|
448 | if (meta.pubdate === null || name == 'pubdate' || name == 'published')
|
449 | meta.pubdate = meta.pubDate = date;
|
450 | if (meta.date === null || name == 'lastbuilddate' || name == 'modified' || name == 'updated')
|
451 | meta.date = date;
|
452 | break;
|
453 | case('link'):
|
454 | case('atom:link'):
|
455 | case('atom10:link'):
|
456 | if (Array.isArray(el)) {
|
457 | el.forEach(function (link){
|
458 | if (link['@']['href']) {
|
459 | if (_.get(link['@'], 'rel')) {
|
460 | if (link['@']['rel'] == 'alternate') {
|
461 | if (!meta.link) meta.link = link['@']['href'];
|
462 | }
|
463 | else if (link['@']['rel'] == 'self') {
|
464 | meta.xmlurl = meta.xmlUrl = link['@']['href'];
|
465 | if (_.isAbsoluteUrl(meta.xmlurl) && this.xmlbase && this.xmlbase.length === 0) {
|
466 | this.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl });
|
467 | this.stack[0] = _.reresolve(this.stack[0], meta.xmlurl);
|
468 | }
|
469 | else if (this.xmlbase && this.xmlbase.length > 0) {
|
470 | meta.xmlurl = meta.xmlUrl = _.resolve(_.get(this.xmlbase[0], '#'), meta.xmlurl);
|
471 | }
|
472 | }
|
473 | else if (link['@']['rel'] == 'hub' && !(meta.cloud.href || meta.cloud.domain)) {
|
474 | meta.cloud.type = 'hub';
|
475 | meta.cloud.href = link['@']['href'];
|
476 | }
|
477 | } else {
|
478 | if (!meta.link) meta.link = link['@']['href'];
|
479 | }
|
480 | } else if (Object.keys(link['@']).length === 0) {
|
481 | meta.link = _.get(link);
|
482 | }
|
483 | if (_.isAbsoluteUrl(meta.link) && this.xmlbase && this.xmlbase.length === 0) {
|
484 | this.xmlbase.unshift({ '#name': 'xml', '#': meta.link});
|
485 | this.stack[0] = _.reresolve(this.stack[0], meta.link);
|
486 | }
|
487 | else if (this.xmlbase && this.xmlbase.length > 0) {
|
488 | meta.link = _.resolve(_.get(this.xmlbase[0], '#'), meta.link);
|
489 | }
|
490 | }, this);
|
491 | } else {
|
492 | if (el['@']['href']) {
|
493 | if (_.get(el['@'], 'rel')) {
|
494 | if (el['@']['rel'] == 'alternate') {
|
495 | if (!meta.link) meta.link = el['@']['href'];
|
496 | }
|
497 | else if (el['@']['rel'] == 'self') {
|
498 | meta.xmlurl = meta.xmlUrl = el['@']['href'];
|
499 | if (_.isAbsoluteUrl(meta.xmlurl) && this.xmlbase && this.xmlbase.length === 0) {
|
500 | this.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl});
|
501 | this.stack[0] = _.reresolve(this.stack[0], meta.xmlurl);
|
502 | }
|
503 | else if (this.xmlbase && this.xmlbase.length > 0) {
|
504 | meta.xmlurl = meta.xmlUrl = _.resolve(_.get(this.xmlbase[0], '#'), meta.xmlurl);
|
505 | }
|
506 | }
|
507 | else if (el['@']['rel'] == 'hub' && !(meta.cloud.href || meta.cloud.domain)) {
|
508 | meta.cloud.type = 'hub';
|
509 | meta.cloud.href = el['@']['href'];
|
510 | }
|
511 | } else {
|
512 | meta.link = el['@']['href'];
|
513 | }
|
514 | } else if (Object.keys(el['@']).length === 0) {
|
515 | if (!meta.link) meta.link = _.get(el);
|
516 | }
|
517 | if (_.isAbsoluteUrl(meta.link) && this.xmlbase && this.xmlbase.length === 0) {
|
518 | this.xmlbase.unshift({ '#name': 'xml', '#': meta.link});
|
519 | this.stack[0] = _.reresolve(this.stack[0], meta.link);
|
520 | }
|
521 | else if (this.xmlbase && this.xmlbase.length > 0) {
|
522 | meta.link = _.resolve(_.get(this.xmlbase[0], '#'), meta.link);
|
523 | }
|
524 | }
|
525 | break;
|
526 | case('managingeditor'):
|
527 | case('webmaster'):
|
528 | case('author'):
|
529 | var author = {};
|
530 | if (name == 'author') {
|
531 | meta.author = _.get(el.name) || _.get(el.email) || _.get(el.uri);
|
532 | }
|
533 | else if (_.get(el)) {
|
534 | author = addressparser(_.get(el))[0];
|
535 | if (author) {
|
536 | el['name'] = author.name;
|
537 | el['email'] = author.address;
|
538 | }
|
539 | if (meta.author === null || name == 'managingeditor') {
|
540 | meta.author = author.name || author.address || _.get(el);
|
541 | }
|
542 | }
|
543 | break;
|
544 | case('cloud'):
|
545 |
|
546 |
|
547 |
|
548 |
|
549 |
|
550 | meta.cloud = {};
|
551 | if (Array.isArray(el)) {
|
552 | Object.keys(el[0]['@']).forEach(function (attr) {
|
553 | if (_.has(el[0]['@'], attr)) {
|
554 | meta.cloud[attr] = el[0]['@'][attr];
|
555 | }
|
556 | });
|
557 | }
|
558 | else {
|
559 | Object.keys(el['@']).forEach(function (attr) {
|
560 | if (_.has(el['@'], attr)) {
|
561 | meta.cloud[attr] = el['@'][attr];
|
562 | }
|
563 | });
|
564 | }
|
565 | meta.cloud.type = 'rsscloud';
|
566 | break;
|
567 | case('language'):
|
568 | meta.language = _.get(el);
|
569 | break;
|
570 | case('image'):
|
571 | case('logo'):
|
572 | if (el.url)
|
573 | meta.image.url = _.get(el.url);
|
574 | if (el.title)
|
575 | meta.image.title = _.get(el.title);
|
576 | if (!meta.image.url && _.get(el))
|
577 | meta.image.url = _.get(el);
|
578 | break;
|
579 | case('icon'):
|
580 | meta.favicon = _.get(el);
|
581 | break;
|
582 | case('copyright'):
|
583 | case('rights'):
|
584 | case('dc:rights'):
|
585 | meta.copyright = _.get(el);
|
586 | break;
|
587 | case('generator'):
|
588 | meta.generator = _.get(el);
|
589 | if (_.get(el['@'], 'version'))
|
590 | meta.generator += (meta.generator ? ' ' : '') + 'v' + el['@'].version;
|
591 | if (_.get(el['@'], 'uri'))
|
592 | meta.generator += meta.generator ? ' (' + el['@'].uri + ')' : el['@'].uri;
|
593 | break;
|
594 | case('category'):
|
595 | case('dc:subject'):
|
596 | case('itunes:category'):
|
597 | case('media:category'):
|
598 | |
599 |
|
600 |
|
601 | var _category = ''
|
602 | , _categories = []
|
603 | ;
|
604 | if (Array.isArray(el)) {
|
605 | el.forEach(function (category){
|
606 | var _categoryValue;
|
607 | if ('category' == name && 'atom' == type) {
|
608 | if (category['@'] && (_categoryValue = _.safeTrim(_.get(category['@'], 'term')))) {
|
609 | meta.categories.push(_categoryValue);
|
610 | }
|
611 | }
|
612 | else if ('category' == name && 'rss' == type){
|
613 | if ((_categoryValue = _.safeTrim(_.get(category)))) {
|
614 | meta.categories.push(_categoryValue);
|
615 | }
|
616 | }
|
617 | else if ('dc:subject' == name && (_categoryValue = _.safeTrim(_.get(category)))) {
|
618 | _categories = _categoryValue.split(' ').map(function (cat){ return cat.trim(); });
|
619 | if (_categories.length) {
|
620 | meta.categories = meta.categories.concat(_categories);
|
621 | }
|
622 | }
|
623 | else if ('itunes:category' == name) {
|
624 | if (category['@'] && _.safeTrim(_.get(category['@'], 'text'))) _category = _.safeTrim(_.get(category['@'], 'text'));
|
625 | if (category[name]) {
|
626 | if (Array.isArray(category[name])) {
|
627 | category[name].forEach(function (subcategory){
|
628 | var _subcategoryValue;
|
629 | if (subcategory['@'] && (_subcategoryValue = _.safeTrim(_.get(subcategory['@'], 'text')))) {
|
630 | meta.categories.push(_category + '/' + _subcategoryValue);
|
631 | }
|
632 | });
|
633 | }
|
634 | else if (category[name]['@'] && (_categoryValue = _.safeTrim(_.get(category[name]['@'], 'text')))) {
|
635 | meta.categories.push(_category + '/' + _categoryValue);
|
636 | }
|
637 | }
|
638 | else if (_category) {
|
639 | meta.categories.push(_category);
|
640 | }
|
641 | }
|
642 | else if ('media:category' == name && (_categoryValue = _.safeTrim(_.get(category)))) {
|
643 | meta.categories.push(_categoryValue);
|
644 | }
|
645 | });
|
646 | } else {
|
647 | if ('category' == name && 'atom' == type) {
|
648 | if ((_category = _.safeTrim(_.get(el['@'], 'term')))) {
|
649 | meta.categories.push(_category);
|
650 | }
|
651 | }
|
652 | else if ('category' == name && 'rss' == type) {
|
653 | if ((_category = _.safeTrim(_.get(el)))) {
|
654 | meta.categories.push(_category);
|
655 | }
|
656 | }
|
657 | else if ('dc:subject' == name && (_category = _.safeTrim(_.get(el)))) {
|
658 | _categories = _category.split(' ').map(function (cat){ return cat.trim(); });
|
659 | if (_categories.length) {
|
660 | meta.categories = meta.categories.concat(_categories);
|
661 | }
|
662 | }
|
663 | else if ('itunes:category' == name) {
|
664 | if (el['@'] && _.safeTrim(_.get(el['@'], 'text'))) _category = _.safeTrim(_.get(el['@'], 'text'));
|
665 | if (el[name]) {
|
666 | if (Array.isArray(el[name])) {
|
667 | el[name].forEach(function (subcategory){
|
668 | var _subcategoryValue;
|
669 | if (subcategory['@'] && (_subcategoryValue = _.safeTrim(_.get(subcategory['@'], 'text')))) {
|
670 | meta.categories.push(_category + '/' + _subcategoryValue);
|
671 | }
|
672 | });
|
673 | }
|
674 | else if (el[name]['@'] && (_category = _.safeTrim(_.get(el[name]['@'], 'text')))) {
|
675 | meta.categories.push(_category + '/' + _category);
|
676 | }
|
677 | }
|
678 | else if (_category) {
|
679 | meta.categories.push(_category);
|
680 | }
|
681 | }
|
682 | else if ('media:category' == name && (_category = _.safeTrim(_.get(el)))) {
|
683 | meta.categories.push(_.get(el));
|
684 | }
|
685 | }
|
686 | break;
|
687 | }
|
688 | }
|
689 |
|
690 | if (name.indexOf('#') !== 0) {
|
691 | if (~name.indexOf(':')) meta[name] = el;
|
692 | else meta[type + ':' + name] = el;
|
693 | }
|
694 | }, this);
|
695 |
|
696 | if (normalize) {
|
697 | if (!meta.description) {
|
698 | if (node['itunes:summary']) meta.description = _.get(node['itunes:summary']);
|
699 | else if (node['tagline']) meta.description = _.get(node['tagline']);
|
700 | }
|
701 | if (!meta.author) {
|
702 | if (node['itunes:author']) meta.author = _.get(node['itunes:author']);
|
703 | else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) meta.author = _.get(node['itunes:owner']['itunes:name']);
|
704 | else if (node['dc:creator']) meta.author = _.get(node['dc:creator']);
|
705 | else if (node['dc:publisher']) meta.author = _.get(node['dc:publisher']);
|
706 | }
|
707 | if (!meta.language) {
|
708 | if (node['@'] && node['@']['xml:lang']) meta.language = _.get(node['@'], 'xml:lang');
|
709 | else if (node['dc:language']) meta.language = _.get(node['dc:language']);
|
710 | }
|
711 | if (!meta.image.url) {
|
712 | if (node['itunes:image']) meta.image.url = _.get(node['itunes:image']['@'], 'href');
|
713 | else if (node['media:thumbnail']) {
|
714 | if (Array.isArray(node['media:thumbnail'])) {
|
715 | node['media:thumbnail'] = node['media:thumbnail'][0];
|
716 | }
|
717 | meta.image.url = _.get(node['media:thumbnail']['@'], 'url');
|
718 | }
|
719 | }
|
720 | if (!meta.copyright) {
|
721 | if (node['media:copyright']) meta.copyright = _.get(node['media:copyright']);
|
722 | else if (node['dc:rights']) meta.copyright = _.get(node['dc:rights']);
|
723 | else if (node['creativecommons:license']) meta.copyright = _.get(node['creativecommons:license']);
|
724 | else if (node['cc:license']) {
|
725 | if (Array.isArray(node['cc:license']) && node['cc:license'][0]['@'] && node['cc:license'][0]['@']['rdf:resource']) {
|
726 | meta.copyright = _.get(node['cc:license'][0]['@'], 'rdf:resource');
|
727 | } else if (node['cc:license']['@'] && node['cc:license']['@']['rdf:resource']) {
|
728 | meta.copyright = _.get(node['cc:license']['@'], 'rdf:resource');
|
729 | }
|
730 | }
|
731 | }
|
732 | if (!meta.generator) {
|
733 | if (node['admin:generatoragent']) {
|
734 | if (Array.isArray(node['admin:generatoragent']) && node['admin:generatoragent'][0]['@'] && node['admin:generatoragent'][0]['@']['rdf:resource']) {
|
735 | meta.generator = _.get(node['admin:generatoragent'][0]['@'], 'rdf:resource');
|
736 | } else if (node['admin:generatoragent']['@'] && node['admin:generatoragent']['@']['rdf:resource']) {
|
737 | meta.generator = _.get(node['admin:generatoragent']['@'], 'rdf:resource');
|
738 | }
|
739 | }
|
740 | }
|
741 | if (meta.categories.length) {
|
742 | meta.categories = _.uniq(meta.categories);
|
743 | }
|
744 | if (!meta.link) {
|
745 | if (meta['atom:id'] && _.get(meta['atom:id']) && /^https?:/.test(_.get(meta['atom:id']))) {
|
746 | meta.link = _.get(meta['atom:id']);
|
747 | }
|
748 | }
|
749 | if (!meta.xmlurl && this.options.feedurl) {
|
750 | meta.xmlurl = meta.xmlUrl = this.options.feedurl;
|
751 | }
|
752 | meta.title = meta.title && _.stripHtml(meta.title);
|
753 | meta.description = meta.description && _.stripHtml(meta.description);
|
754 | }
|
755 |
|
756 | return meta;
|
757 | };
|
758 |
|
759 | FeedParser.prototype.handleItem = function handleItem (node, type, options){
|
760 | if (!type || !node) return {};
|
761 |
|
762 | var item = {}
|
763 | , normalize = !options || (options && options.normalize)
|
764 | ;
|
765 |
|
766 | if (normalize) {
|
767 | ['title','description','summary','date','pubdate','pubDate','link','guid','author','comments', 'origlink'].forEach(function (property){
|
768 | item[property] = null;
|
769 | });
|
770 | item.image = {};
|
771 | item.source = {};
|
772 | item.categories = [];
|
773 | item.enclosures = [];
|
774 | }
|
775 |
|
776 | Object.keys(node).forEach(function(name){
|
777 | var el = node[name]
|
778 | , attrs = _.get(el, '@')
|
779 | , enclosure;
|
780 | if (normalize) {
|
781 | switch(name){
|
782 | case('title'):
|
783 | item.title = _.get(el);
|
784 | break;
|
785 | case('description'):
|
786 | case('summary'):
|
787 | item.summary = _.get(el);
|
788 | if (!item.description) item.description = _.get(el);
|
789 | break;
|
790 | case('content'):
|
791 | case('content:encoded'):
|
792 | item.description = _.get(el);
|
793 | break;
|
794 | case('pubdate'):
|
795 | case('published'):
|
796 | case('issued'):
|
797 | case('modified'):
|
798 | case('updated'):
|
799 | case('dc:date'):
|
800 | var date = _.get(el) ? new Date(_.get(el)) : null;
|
801 | if (!date) break;
|
802 | if (item.pubdate === null || name == 'pubdate' || name == 'published' || name == 'issued')
|
803 | item.pubdate = item.pubDate = date;
|
804 | if (item.date === null || name == 'modified' || name == 'updated')
|
805 | item.date = date;
|
806 | break;
|
807 | case('link'):
|
808 | if (Array.isArray(el)) {
|
809 | el.forEach(function (link){
|
810 | if (link['@']['href']) {
|
811 | if (_.get(link['@'], 'rel')) {
|
812 | if (link['@']['rel'] == 'canonical') item.origlink = link['@']['href'];
|
813 | if (link['@']['rel'] == 'alternate') item.link = link['@']['href'];
|
814 | if (link['@']['rel'] == 'self' && !item.link) item.link = link['@']['href'];
|
815 | if (link['@']['rel'] == 'replies') item.comments = link['@']['href'];
|
816 | if (link['@']['rel'] == 'enclosure') {
|
817 | enclosure = {};
|
818 | enclosure.url = link['@']['href'];
|
819 | enclosure.type = _.get(link['@'], 'type');
|
820 | enclosure.length = _.get(link['@'], 'length');
|
821 | if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
|
822 | item.enclosures.push(enclosure);
|
823 | }
|
824 | }
|
825 | } else {
|
826 | item.link = link['@']['href'];
|
827 | }
|
828 | } else if (Object.keys(link['@']).length === 0) {
|
829 | if (!item.link) item.link = _.get(link);
|
830 | }
|
831 | });
|
832 | } else {
|
833 | if (el['@']['href']) {
|
834 | if (_.get(el['@'], 'rel')) {
|
835 | if (el['@']['rel'] == 'canonical') item.origlink = el['@']['href'];
|
836 | if (el['@']['rel'] == 'alternate') item.link = el['@']['href'];
|
837 | if (el['@']['rel'] == 'self' && !item.link) item.link = el['@']['href'];
|
838 | if (el['@']['rel'] == 'replies') item.comments = el['@']['href'];
|
839 | if (el['@']['rel'] == 'enclosure') {
|
840 | enclosure = {};
|
841 | enclosure.url = el['@']['href'];
|
842 | enclosure.type = _.get(el['@'], 'type');
|
843 | enclosure.length = _.get(el['@'], 'length');
|
844 | if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
|
845 | item.enclosures.push(enclosure);
|
846 | }
|
847 | }
|
848 | } else {
|
849 | item.link = el['@']['href'];
|
850 | }
|
851 | } else if (Object.keys(el['@']).length === 0) {
|
852 | if (!item.link) item.link = _.get(el);
|
853 | }
|
854 | }
|
855 | if (!item.guid) item.guid = item.link;
|
856 | break;
|
857 | case('guid'):
|
858 | case('id'):
|
859 | item.guid = _.get(el);
|
860 |
|
861 |
|
862 |
|
863 |
|
864 |
|
865 |
|
866 |
|
867 |
|
868 | if (item.guid && type == 'rss' && name == 'guid' && !(attrs.ispermalink && attrs.ispermalink.match(/false/i))) {
|
869 | item.permalink = item.guid;
|
870 | }
|
871 | break;
|
872 | case('author'):
|
873 | var author = {};
|
874 | if (_.get(el)) {
|
875 | author = addressparser(_.get(el))[0];
|
876 | if (author) {
|
877 | el['name'] = author.name;
|
878 | el['email'] = author.address;
|
879 | item.author = author.name || author.address;
|
880 | }
|
881 |
|
882 | else {
|
883 | item.author = _.get(el);
|
884 | }
|
885 | } else {
|
886 | item.author = _.get(el.name) || _.get(el.email) || _.get(el.uri);
|
887 | }
|
888 | break;
|
889 | case('dc:creator'):
|
890 | item.author = _.get(el);
|
891 | break;
|
892 | case('comments'):
|
893 | item.comments = _.get(el);
|
894 | break;
|
895 | case('source'):
|
896 | if ('rss' == type) {
|
897 | item.source['title'] = _.get(el);
|
898 | item.source['url'] = _.get(el['@'], 'url');
|
899 | } else if ('atom' == type) {
|
900 | if (el.title && _.get(el.title))
|
901 | item.source['title'] = _.get(el.title);
|
902 | if (el.link && _.get(el.link['@'], 'href'))
|
903 | item.source['url'] = _.get(el.link['@'], 'href');
|
904 | }
|
905 | if (item.source['url'] && !this.meta.xmlurl) {
|
906 | this.meta.xmlurl = this.meta.xmlUrl = item.source['url'];
|
907 | if (_.isAbsoluteUrl(item.source['url']) && this.xmlbase && this.xmlbase.length === 0) {
|
908 | this.xmlbase.unshift({ '#name': 'xml', '#': item.source['url']});
|
909 | this.stack[0] = _.reresolve(this.stack[0], item.source['url']);
|
910 | }
|
911 | else if (this.xmlbase && this.xmlbase.length > 0) {
|
912 | this.meta.xmlurl = this.meta.xmlUrl = item.source['url'] = _.resolve(_.get(this.xmlbase[0], '#'), item.source['url']);
|
913 | }
|
914 | }
|
915 | break;
|
916 | case('enclosure'):
|
917 | if (Array.isArray(el)) {
|
918 | el.forEach(function (enc){
|
919 | enclosure = {};
|
920 | enclosure.url = _.get(enc['@'], 'url');
|
921 | enclosure.type = _.get(enc['@'], 'type');
|
922 | enclosure.length = _.get(enc['@'], 'length');
|
923 | if (~indexOfObject(item.enclosures, enclosure, ['url', 'type'])) {
|
924 | item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure);
|
925 | } else {
|
926 | item.enclosures.push(enclosure);
|
927 | }
|
928 | });
|
929 | } else {
|
930 | enclosure = {};
|
931 | enclosure.url = _.get(el['@'], 'url');
|
932 | enclosure.type = _.get(el['@'], 'type');
|
933 | enclosure.length = _.get(el['@'], 'length');
|
934 | if (~indexOfObject(item.enclosures, enclosure, ['url', 'type'])) {
|
935 | item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure);
|
936 | } else {
|
937 | item.enclosures.push(enclosure);
|
938 | }
|
939 | }
|
940 | break;
|
941 | case('media:content'):
|
942 | var optionalAttributes = ['bitrate', 'framerate', 'samplingrate', 'duration', 'height', 'width'];
|
943 | if (Array.isArray(el)) {
|
944 | el.forEach(function (enc){
|
945 | enclosure = {};
|
946 | enclosure.url = _.get(enc['@'], 'url');
|
947 | enclosure.type = _.get(enc['@'], 'type') || _.get(enc['@'], 'medium');
|
948 | enclosure.length = _.get(enc['@'], 'filesize');
|
949 | var index = indexOfObject(item.enclosures, enclosure, ['url', 'type']);
|
950 | if (index !== -1) {
|
951 | enclosure = item.enclosures[index];
|
952 | }
|
953 | optionalAttributes.forEach(function (attribute) {
|
954 | if (!enclosure[attribute] && _.get(enc['@'], attribute)) {
|
955 | enclosure[attribute] = _.get(enc['@'], attribute);
|
956 | }
|
957 | });
|
958 | if (index === -1) {
|
959 | item.enclosures.push(enclosure);
|
960 | }
|
961 | });
|
962 | } else {
|
963 | enclosure = {};
|
964 | enclosure.url = _.get(el['@'], 'url');
|
965 | enclosure.type = _.get(el['@'], 'type') || _.get(el['@'], 'medium');
|
966 | enclosure.length = _.get(el['@'], 'filesize');
|
967 | var index = indexOfObject(item.enclosures, enclosure, ['url', 'type']);
|
968 | if (index !== -1) {
|
969 | enclosure = item.enclosures[index];
|
970 | }
|
971 | optionalAttributes.forEach(function (attribute) {
|
972 | if (!enclosure[attribute] && _.get(el['@'], attribute)) {
|
973 | enclosure[attribute] = _.get(el['@'], attribute);
|
974 | }
|
975 | });
|
976 | if (index === -1) {
|
977 | item.enclosures.push(enclosure);
|
978 | }
|
979 | }
|
980 | break;
|
981 | case('enc:enclosure'):
|
982 | break;
|
983 | case('category'):
|
984 | case('dc:subject'):
|
985 | case('itunes:category'):
|
986 | case('media:category'):
|
987 | |
988 |
|
989 |
|
990 | var _category = ''
|
991 | , _categories = []
|
992 | ;
|
993 | if (Array.isArray(el)) {
|
994 | el.forEach(function (category){
|
995 | if ('category' == name && 'atom' == type) {
|
996 | if (category['@'] && _.get(category['@'], 'term')) item.categories.push(_.get(category['@'], 'term'));
|
997 | } else if ('category' == name && _.get(category) && 'rss' == type) {
|
998 | item.categories.push(_.get(category).trim());
|
999 | } else if ('dc:subject' == name && _.get(category)) {
|
1000 | _categories = _.get(category).split(' ').map(function (cat){ return cat.trim(); });
|
1001 | if (_categories.length) item.categories = item.categories.concat(_categories);
|
1002 | } else if ('itunes:category' == name) {
|
1003 | if (category['@'] && _.get(category['@'], 'text')) _category = _.get(category['@'], 'text');
|
1004 | if (category[name]) {
|
1005 | if (Array.isArray(category[name])) {
|
1006 | category[name].forEach(function (subcategory){
|
1007 | if (subcategory['@'] && _.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + _.get(subcategory['@'], 'text'));
|
1008 | });
|
1009 | } else {
|
1010 | if (category[name]['@'] && _.get(category[name]['@'], 'text'))
|
1011 | item.categories.push(_category + '/' + _.get(category[name]['@'], 'text'));
|
1012 | }
|
1013 | } else {
|
1014 | item.categories.push(_category);
|
1015 | }
|
1016 | } else if ('media:category' == name) {
|
1017 | item.categories.push(_.get(category));
|
1018 | }
|
1019 | });
|
1020 | } else {
|
1021 | if ('category' == name && 'atom' == type) {
|
1022 | if (_.get(el['@'], 'term')) item.categories.push(_.get(el['@'], 'term'));
|
1023 | } else if ('category' == name && _.get(el) && 'rss' == type) {
|
1024 | item.categories.push(_.get(el).trim());
|
1025 | } else if ('dc:subject' == name && _.get(el)) {
|
1026 | _categories = _.get(el).split(' ').map(function (cat){ return cat.trim(); });
|
1027 | if (_categories.length) item.categories = item.categories.concat(_categories);
|
1028 | } else if ('itunes:category' == name) {
|
1029 | if (el['@'] && _.get(el['@'], 'text')) _category = _.get(el['@'], 'text');
|
1030 | if (el[name]) {
|
1031 | if (Array.isArray(el[name])) {
|
1032 | el[name].forEach(function (subcategory){
|
1033 | if (subcategory['@'] && _.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + _.get(subcategory['@'], 'text'));
|
1034 | });
|
1035 | } else {
|
1036 | if (el[name]['@'] && _.get(el[name]['@'], 'text'))
|
1037 | item.categories.push(_category + '/' + _.get(el[name]['@'], 'text'));
|
1038 | }
|
1039 | } else {
|
1040 | item.categories.push(_category);
|
1041 | }
|
1042 | } else if ('media:category' == name) {
|
1043 | item.categories.push(_.get(el));
|
1044 | }
|
1045 | }
|
1046 | break;
|
1047 | case('feedburner:origlink'):
|
1048 | case('pheedo:origlink'):
|
1049 | if (!item.origlink) {
|
1050 | item.origlink = _.get(el);
|
1051 | }
|
1052 | break;
|
1053 | }
|
1054 | }
|
1055 |
|
1056 | if (name.indexOf('#') !== 0) {
|
1057 | if (~name.indexOf(':')) item[name] = el;
|
1058 | else item[type + ':' + name] = el;
|
1059 | }
|
1060 | }, this);
|
1061 |
|
1062 | if (normalize) {
|
1063 | if (!item.description) {
|
1064 | if (node['itunes:summary']) item.description = _.get(node['itunes:summary']);
|
1065 | }
|
1066 | if (!item.author) {
|
1067 | if (node['itunes:author']) item.author = _.get(node['itunes:author']);
|
1068 | else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) item.author = _.get(node['itunes:owner']['itunes:name']);
|
1069 | else if (node['dc:publisher']) item.author = _.get(node['dc:publisher']);
|
1070 | }
|
1071 | if (!item.image.url) {
|
1072 | if (node['itunes:image']) item.image.url = _.get(node['itunes:image']['@'], 'href');
|
1073 | else if (node['media:thumbnail']) {
|
1074 | if (Array.isArray(node['media:thumbnail'])) {
|
1075 | item.image.url = _.get(node['media:thumbnail'][0]['@'], 'url');
|
1076 | } else {
|
1077 | item.image.url = _.get(node['media:thumbnail']['@'], 'url');
|
1078 | }
|
1079 | }
|
1080 | else if (node['media:content'] && node['media:content']['media:thumbnail']) item.image.url = _.get(node['media:content']['media:thumbnail']['@'], 'url');
|
1081 | else if (node['media:group'] && node['media:group']['media:thumbnail']) item.image.url = _.get(node['media:group']['media:thumbnail']['@'], 'url');
|
1082 | else if (node['media:group'] && node['media:group']['media:content'] && node['media:group']['media:content']['media:thumbnail']) item.image.url = _.get(node['media:group']['media:content']['media:thumbnail']['@'], 'url');
|
1083 | else if (node['g:image_link']) item.image.url = _.get(node['g:image_link']);
|
1084 | }
|
1085 | if (item.categories.length) {
|
1086 | item.categories = _.uniq(item.categories);
|
1087 | }
|
1088 | if (!item.link) {
|
1089 | if (item.guid && /^https?:/.test(item.guid)) {
|
1090 | item.link = item.guid;
|
1091 | }
|
1092 | }
|
1093 | item.title = item.title && _.stripHtml(item.title);
|
1094 | }
|
1095 | return item;
|
1096 | };
|
1097 |
|
1098 |
|
1099 | FeedParser.prototype._transform = function (data, encoding, done) {
|
1100 | try {
|
1101 | this.stream.write(data);
|
1102 | done();
|
1103 | }
|
1104 | catch (e) {
|
1105 | done(e);
|
1106 | this.push(null);
|
1107 | }
|
1108 | };
|
1109 |
|
1110 | FeedParser.prototype._flush = function (done) {
|
1111 | try {
|
1112 | this.stream.end();
|
1113 | done();
|
1114 | }
|
1115 | catch (e) {
|
1116 | done(e);
|
1117 | }
|
1118 | };
|
1119 |
|
1120 | exports = module.exports = FeedParser;
|