1 |
|
2 | 'use strict';
|
3 |
|
4 | const Crawler = require('../lib/crawler');
|
5 | const expect = require('chai').expect;
|
6 | const sinon = require('sinon');
|
7 |
|
8 |
|
9 | const nock = require('nock');
|
10 |
|
11 |
|
12 | let cb;
|
13 | let crawler;
|
14 |
|
15 | describe('preRequest feature tests', function() {
|
16 |
|
17 | before(function() {
|
18 | nock.cleanAll();
|
19 | nock('http://test.crawler.com').get('/').reply(200, 'ok').persist();
|
20 | });
|
21 |
|
22 | beforeEach(function() {
|
23 | cb = sinon.spy();
|
24 | });
|
25 |
|
26 | it('should do preRequest before request when preRequest defined in crawler options', function(finishTest) {
|
27 | crawler = new Crawler({
|
28 | jQuery: false,
|
29 | preRequest: (options, done) => {
|
30 | setTimeout(function() {
|
31 | cb('preRequest');
|
32 | done();
|
33 | }, 50);
|
34 | }
|
35 | });
|
36 | crawler.queue({
|
37 | uri: 'http://test.crawler.com/',
|
38 | callback: (error, response, done) => {
|
39 | expect(error).to.be.null;
|
40 | expect(cb.getCalls().length).to.equal(1);
|
41 | expect(cb.getCalls()[0].args[0]).to.equal('preRequest');
|
42 | done();
|
43 | finishTest();
|
44 | }
|
45 | });
|
46 | });
|
47 |
|
48 | it('should do preRequest before request when preRequest defined in queue options', function(finishTest) {
|
49 | crawler = new Crawler({ jQuery: false });
|
50 | crawler.queue({
|
51 | uri: 'http://test.crawler.com/',
|
52 | preRequest: (options, done) => {
|
53 | setTimeout(function() {
|
54 | cb('preRequest');
|
55 | done();
|
56 | }, 50);
|
57 | },
|
58 | callback: (error, response, done) => {
|
59 | expect(error).to.be.null;
|
60 | expect(cb.getCalls().length).to.equal(1);
|
61 | expect(cb.getCalls()[0].args[0]).to.equal('preRequest');
|
62 | done();
|
63 | finishTest();
|
64 | }
|
65 | });
|
66 | });
|
67 |
|
68 | it('preRequest should be executed the same times as request', function(finishTest) {
|
69 | crawler = new Crawler({
|
70 | jQuery: false,
|
71 | rateLimit: 50,
|
72 | preRequest: (options, done) => {
|
73 | cb('preRequest');
|
74 | done();
|
75 | },
|
76 | callback: (error, response, done) => {
|
77 | expect(error).to.be.null;
|
78 | cb('callback');
|
79 | done();
|
80 | }
|
81 | });
|
82 | const seq = [];
|
83 | for(var i = 0; i < 5; i++) {
|
84 | crawler.queue('http://test.crawler.com/');
|
85 | seq.push('preRequest');
|
86 | seq.push('callback');
|
87 | }
|
88 | crawler.queue({
|
89 | uri: 'http://test.crawler.com/',
|
90 | preRequest: (options, done) => { done(); },
|
91 | callback: (error, response, done) => {
|
92 | expect(cb.getCalls().map(c => c.args[0]).join()).to.equal(seq.join());
|
93 | done();
|
94 | finishTest();
|
95 | }
|
96 | });
|
97 | });
|
98 |
|
99 | it('when preRequest fail, should retry three times by default', function(finishTest) {
|
100 | crawler = new Crawler({
|
101 | jQuery: false,
|
102 | rateLimit: 20,
|
103 | retryTimeout: 0,
|
104 | preRequest: (options, done) => {
|
105 | cb('preRequest');
|
106 | done(new Error());
|
107 | },
|
108 | callback: (error, response, done) => {
|
109 | expect(error).to.exist;
|
110 | expect(cb.getCalls().length).to.equal(4);
|
111 | done();
|
112 | finishTest();
|
113 | }
|
114 | });
|
115 | crawler.queue('http://test.crawler.com/');
|
116 | });
|
117 |
|
118 | it('when preRequest fail, should return error when error.op = \'fail\'', function(finishTest) {
|
119 | crawler = new Crawler({
|
120 | jQuery: false,
|
121 | rateLimit: 20,
|
122 | retryTimeout: 0,
|
123 | preRequest: (options, done) => {
|
124 | cb('preRequest');
|
125 | const error = new Error();
|
126 | error.op = 'fail';
|
127 | done(error);
|
128 | },
|
129 | callback: (error, response, done) => {
|
130 | expect(error).to.exist;
|
131 | expect(cb.getCalls().length).to.equal(1);
|
132 | done();
|
133 | finishTest();
|
134 | }
|
135 | });
|
136 | crawler.queue('http://test.crawler.com/');
|
137 | });
|
138 |
|
139 | it('when preRequest fail, callback should not be called when error.op = \'abort\'', function(finishTest) {
|
140 | crawler = new Crawler({
|
141 | jQuery: false,
|
142 | rateLimit: 20,
|
143 | retries: 0,
|
144 | preRequest: (options, done) => {
|
145 | cb('preRequest');
|
146 | let error = new Error();
|
147 | error.op = 'abort';
|
148 | done(error);
|
149 | setTimeout(function() {
|
150 | expect(cb.getCalls().length).to.equal(1);
|
151 | for (let i = 0; i < cb.getCalls().length; i++) {
|
152 | expect(cb.getCalls()[i].args[0]).to.equal('preRequest');
|
153 | }
|
154 | finishTest();
|
155 | }, 100);
|
156 | },
|
157 | callback: () => {
|
158 | expect(null).to.equal(1);
|
159 | }
|
160 | });
|
161 | crawler.queue('http://test.crawler.com/');
|
162 | });
|
163 |
|
164 | it('when preRequest fail, should put request back in queue when error.op = \'queue\'', function(finishTest) {
|
165 | let counter = 0;
|
166 | crawler = new Crawler({
|
167 | jQuery: false,
|
168 | rateLimit: 20,
|
169 | preRequest: (options, done) => {
|
170 | expect(options.retries).to.equal(3);
|
171 | let error = new Error();
|
172 | error.op = 'queue';
|
173 | if(++counter > 3) {
|
174 | expect(cb.getCalls().length).to.equal(3);
|
175 | for (let i = 0; i < cb.getCalls().length; i++) {
|
176 | expect(cb.getCalls()[i].args[0]).to.equal('preRequest');
|
177 | }
|
178 |
|
179 | error.op = 'abort';
|
180 | finishTest();
|
181 | }
|
182 | cb('preRequest');
|
183 | done(error);
|
184 | },
|
185 | callback: () => {
|
186 | expect(null).to.equal(1);
|
187 | }
|
188 | });
|
189 | crawler.queue('http://test.crawler.com/');
|
190 | });
|
191 | }); |
\ | No newline at end of file |