1 |
|
2 | 'use strict';
|
3 |
|
4 | var Crawler = require('../lib/crawler');
|
5 | var expect = require('chai').expect;
|
6 | var given = require('mocha-testdata');
|
7 | var jsdom = require('jsdom');
|
8 | var c;
|
9 |
|
10 | describe('Jquery testing', function() {
|
11 | afterEach(function() {
|
12 | c = {};
|
13 | });
|
14 | describe('Jquery parsing', function() {
|
15 | given.async(jsdom, 'cheerio','whacko')
|
16 | .it('should work on inline html', function(done, jquery) {
|
17 | c = new Crawler();
|
18 | c.queue([{
|
19 | html: '<p><i>great!</i></p>',
|
20 | jquery: jquery,
|
21 | callback: function(error, res) //noinspection BadExpressionStatementJS,BadExpressionStatementJS
|
22 | {
|
23 | expect(error).to.be.null;
|
24 | expect(res.$('i').html()).to.equal('great!');
|
25 | done();
|
26 | }
|
27 | }]);
|
28 | });
|
29 | });
|
30 | describe('Jquery injection', function() {
|
31 | it('should enable cheerio by default', function(done) {
|
32 | c = new Crawler({
|
33 | html: '<p><i>great!</i></p>',
|
34 | jquery: true,
|
35 | callback:function(error, res) {
|
36 | expect(error).to.be.null;
|
37 | expect(res.$('i').html()).to.equal('great!');
|
38 | done();
|
39 | }
|
40 | });
|
41 | c.queue([{html: '<p><i>great!</i></p>'}]);
|
42 | });
|
43 | given.async(jsdom).it('should enable jsdom if set', function(done, jquery) {
|
44 | c = new Crawler({
|
45 | jquery: jquery,
|
46 | callback:function(error, res) {
|
47 | expect(error).to.be.null;
|
48 | expect(res.$('i').html()).to.equal('great!');
|
49 | done();
|
50 | }
|
51 | });
|
52 | c.queue([{
|
53 | html: '<p><i>great!</i></p>',
|
54 | }]);
|
55 | });
|
56 | given.async('cheerio', {name: 'cheerio'}).it('should enable cheerio if set', function(done, jquery) {
|
57 | c = new Crawler({
|
58 | jquery: jquery,
|
59 | callback:function(error, res) {
|
60 | expect(error).to.be.null;
|
61 | expect(res.$('i').html()).to.equal('great!');
|
62 | done();
|
63 | }
|
64 | });
|
65 | c.queue([{ html: '<p><i>great!</i></p>'}]);
|
66 | });
|
67 | it('should enable whacko if set',function(done){
|
68 | c = new Crawler({
|
69 | jquery: 'whacko',
|
70 | callback:function(error, res) {
|
71 | expect(error).to.be.null;
|
72 | expect(res.$('i').html()).to.equal('great!');
|
73 | done();
|
74 | }
|
75 | });
|
76 | c.queue([{html: '<p><i>great!</i></p>'}]);
|
77 | });
|
78 | it('should disable jQuery if set to false', function(done) {
|
79 | c = new Crawler({
|
80 | jQuery: false,
|
81 | callback:function(error, res) {
|
82 | expect(error).to.be.null;
|
83 | expect(res.$).to.be.undefined;
|
84 | done();
|
85 | }
|
86 | });
|
87 | c.queue([{html: '<p><i>great!</i></p>' }]);
|
88 | });
|
89 | given.async('trucmuch', null, undefined).it('should not inject jquery', function(done, jquery) {
|
90 | c = new Crawler({
|
91 | jquery: jquery,
|
92 | callback:function(error, res) {
|
93 | expect(error).to.be.null;
|
94 | expect(res.$).to.be.undefined;
|
95 | done();
|
96 | }
|
97 | });
|
98 | c.queue([{html: '<p><i>great!</i></p>' }]);
|
99 | });
|
100 | given.async('cheerio', jsdom).it('should also enable jQuery even if body is empty, to prevent `$ is not a function` error', function(done, jquery) {
|
101 | c = new Crawler({
|
102 | jQuery: jquery,
|
103 | callback:function(error, res) {
|
104 | expect(error).to.be.null;
|
105 | expect(res.$('i').html()).to.equal('great!');
|
106 | done();
|
107 | }
|
108 | });
|
109 | c.queue([{ html: '<p><i>great!</i></p>'}]);
|
110 | });
|
111 | given.async('cheerio', jsdom).it('should disable jQuery if body is not text/html ', function(done, jquery) {
|
112 | c = new Crawler({
|
113 | jQuery: jquery,
|
114 | callback:function(error, res) {
|
115 | expect(error).to.be.null;
|
116 | expect(res.$('i').html()).to.equal('great!');
|
117 | done();
|
118 | }
|
119 | });
|
120 | c.queue([{html: '<p><i>great!</i></p>'}]);
|
121 | });
|
122 | it('should work if jquery is set instead of jQuery when building Crawler', function(done) {
|
123 | c = new Crawler({
|
124 | maxConnections: 10,
|
125 | jquery: true,
|
126 | callback: function(error, res,next) {
|
127 | expect(res.$).not.to.be.undefined;
|
128 | expect(res.options.jQuery).to.be.true;
|
129 | expect(res.options.jquery).to.be.undefined;
|
130 | next();
|
131 | }
|
132 | });
|
133 |
|
134 | c.on('drain',done);
|
135 | c.queue([{ html: '<p><i>great!</i></p>' }]);
|
136 | });
|
137 | it('should work if jquery is set instead of jQuery when queuing', function(done) {
|
138 | c = new Crawler({
|
139 | maxConnections: 10,
|
140 | jQuery: true,
|
141 | callback: function(error, res,next) {
|
142 | expect(res.$).to.be.undefined;
|
143 | expect(res.options.jQuery).to.be.false;
|
144 | next();
|
145 | }
|
146 | });
|
147 |
|
148 | c.on('drain',done);
|
149 | c.queue([{
|
150 | html: '<p><i>great!</i></p>',
|
151 | jquery: false
|
152 | }]);
|
153 | });
|
154 | it('should not inject jquery if jquery is set to undefined', function(done) {
|
155 | c = new Crawler({
|
156 | maxConnections: 10,
|
157 | jquery: undefined,
|
158 | callback: function(error, res,next) {
|
159 | expect(res.$).to.be.undefined;
|
160 | expect(res.options.jQuery).to.be.undefined;
|
161 | next();
|
162 | }
|
163 | });
|
164 |
|
165 | c.on('drain',done);
|
166 | c.queue([{ html: '<p><i>great!</i></p>'}]);
|
167 | });
|
168 | });
|
169 | describe('Cheerio specific test', function() {
|
170 | it('should inject cheerio with options', function(done) {
|
171 | var cheerioConf = {
|
172 | name: 'cheerio',
|
173 | options: {
|
174 | normalizeWhitespace: true,
|
175 | xmlMode: true
|
176 | }
|
177 | };
|
178 | c = new Crawler({
|
179 | maxConnections: 10,
|
180 | jquery: cheerioConf,
|
181 | callback: function(error, res,next) {
|
182 | expect(error).to.be.null;
|
183 | expect(res.$('i').html()).to.equal('great!');
|
184 | next();
|
185 | }
|
186 | });
|
187 |
|
188 | c.on('drain',done);
|
189 | c.queue([{html: '<p><i>great!</i></p>'}]);
|
190 | });
|
191 | });
|
192 | });
|