1 |
|
2 | var Container, DOMParser, Extractor, FindType, Promise, xpath;
|
3 |
|
4 | Container = require('./container');
|
5 |
|
6 | xpath = require('xpath');
|
7 |
|
8 | DOMParser = require('xmldom').DOMParser;
|
9 |
|
10 | Promise = require('bluebird');
|
11 |
|
12 | FindType = {
|
13 | jquery: 0,
|
14 | qsa: 0,
|
15 | cheerio: 0,
|
16 | xpath: 1
|
17 | };
|
18 |
|
19 | Extractor = (function() {
|
20 | function Extractor(_url, _scraper) {
|
21 | this._url = _url;
|
22 | this._scraper = _scraper;
|
23 | }
|
24 |
|
25 | Extractor.prototype.find = function(query, callback) {
|
26 | var container, thisClass;
|
27 | container = new Container(this._url, this._scraper, FindType.cheerio);
|
28 | thisClass = this;
|
29 | return new Promise(function(resolve, reject) {
|
30 | return thisClass._scraper.scrape(function($) {
|
31 | return container._find = $(query);
|
32 | }, function() {
|
33 | if (typeof callback === 'function') {
|
34 | callback(container);
|
35 | }
|
36 | return resolve(container);
|
37 | });
|
38 | });
|
39 | };
|
40 |
|
41 | Extractor.prototype.findXpath = function(query, callback) {
|
42 | var container, thisClass;
|
43 | container = new Container(this._url, this._scraper, FindType.xpath);
|
44 | thisClass = this;
|
45 | return new Promise(function(resolve, reject) {
|
46 | return thisClass._scraper.scrape(function() {
|
47 | var doc, nodes;
|
48 | doc = new DOMParser().parseFromString(thisClass._scraper.scraper.body, "text/html");
|
49 | nodes = xpath.select(query, doc);
|
50 | return container._find = nodes;
|
51 | }, function() {
|
52 | if (typeof callback === 'function') {
|
53 | callback(container);
|
54 | }
|
55 | return resolve(container);
|
56 | });
|
57 | });
|
58 | };
|
59 |
|
60 | return Extractor;
|
61 |
|
62 | })();
|
63 |
|
64 | exports.Extractor = Extractor;
|
65 |
|
66 | exports.FindType = FindType;
|