1 |
|
2 |
|
3 | 'use strict';
|
4 |
|
5 | const Crawler = require('../lib/crawler');
|
6 | const expect = require('chai').expect;
|
7 | const nock = require('nock');
|
8 |
|
9 | describe('Request tests', function() {
|
10 | before(function() {
|
11 | nock.cleanAll();
|
12 | });
|
13 |
|
14 | let crawler = null;
|
15 | let scope = null;
|
16 | const origin = 'http://www.whatever.com';
|
17 | const path = '/get';
|
18 | const headerPath = '/header';
|
19 |
|
20 | beforeEach(function() {
|
21 | crawler = new Crawler({
|
22 | retries: 0,
|
23 | json: true,
|
24 | jQuery: false,
|
25 | });
|
26 |
|
27 | scope = nock(origin).get(path).reply(200).persist();
|
28 | nock(origin).get(headerPath).reply(function(){
|
29 | return [200, this.req.headers, { 'Content-Type': 'application/json' }];
|
30 | });
|
31 | });
|
32 |
|
33 | afterEach(function() {
|
34 | scope.persist(false);
|
35 | crawler = null;
|
36 | });
|
37 |
|
38 | it('should crawl one request', function(end) {
|
39 | crawler.queue({uri: `${origin}${path}`, callback: (error, res, done) => {
|
40 | expect(error).to.be.null;
|
41 | expect(res.statusCode).to.eql(200);
|
42 | done();
|
43 | end();
|
44 | }});
|
45 | });
|
46 |
|
47 | it('should crawl two request request and execute the onDrain() callback', function(done) {
|
48 | const callback = function(error, res,next) {
|
49 | expect(error).to.be.null;
|
50 | expect(res.statusCode).to.eql(200);
|
51 | next();
|
52 | };
|
53 |
|
54 | crawler.on('drain',done);
|
55 |
|
56 | crawler.queue({
|
57 | uri: `${origin}${path}`,
|
58 | callback: callback
|
59 | });
|
60 |
|
61 | crawler.queue({
|
62 | uri: `${origin}${path}`,
|
63 | callback: callback
|
64 | });
|
65 | });
|
66 |
|
67 | it('should contain gzip header', function(end) {
|
68 | crawler.queue({uri: `${origin}${headerPath}`, callback:function(error, res, done) {
|
69 | expect(error).to.be.null;
|
70 | expect(res.body['accept-encoding']).to.match(/gzip/);
|
71 | done();
|
72 | end();
|
73 | }});
|
74 | });
|
75 |
|
76 | it('should use the provided user-agent', function(end) {
|
77 | const ua = 'test/1.2';
|
78 | crawler.queue({
|
79 | uri: `${origin}${headerPath}`,
|
80 | userAgent: ua,
|
81 | callback:function(error, res, done) {
|
82 | expect(error).to.be.null;
|
83 | expect(res.body['user-agent']).to.eql(ua);
|
84 | done();
|
85 | end();
|
86 | }
|
87 | });
|
88 | });
|
89 |
|
90 | it('should replace the global User-Agent', function(end) {
|
91 | crawler = new Crawler({
|
92 | headers:{'User-Agent': 'test/1.2'},
|
93 | jQuery: false,
|
94 | json: true,
|
95 | callback:function(error, res, done) {
|
96 | expect(error).to.be.null;
|
97 | expect(res.body['user-agent']).to.equal('foo/bar');
|
98 | done();
|
99 | end();
|
100 | }
|
101 | });
|
102 |
|
103 | crawler.queue({uri: `${origin}${headerPath}`,headers:{'User-Agent': 'foo/bar'}});
|
104 | });
|
105 |
|
106 | it('should replace the global userAgent', function(end) {
|
107 | crawler = new Crawler({
|
108 | userAgent: 'test/1.2',
|
109 | jQuery: false,
|
110 | json: true,
|
111 | callback:function(error, res, done) {
|
112 | expect(error).to.be.null;
|
113 | expect(res.body['user-agent']).to.equal('foo/bar');
|
114 | done();
|
115 | end();
|
116 | }
|
117 | });
|
118 |
|
119 | crawler.queue({uri: `${origin}${headerPath}`, userAgent: 'foo/bar'});
|
120 | });
|
121 |
|
122 | it('should spoof the referer', function(end) {
|
123 | const referer = 'http://spoofed.com';
|
124 |
|
125 | crawler.queue({
|
126 | uri: `${origin}${headerPath}`,
|
127 | referer: referer,
|
128 | callback:function(error, res, done) {
|
129 | expect(error).to.be.null;
|
130 | expect(res.body.referer).to.equal(referer);
|
131 | done();
|
132 | end();
|
133 | }
|
134 | });
|
135 | });
|
136 | });
|