1 | var assert = require("assert");
|
2 | var _ = require("underscore");
|
3 | var memstat = require("../plugins/stat-plugin.js");
|
4 | var logger = require("../plugins/log-plugin.js");
|
5 |
|
6 | var testSite = require("./website/start.js").site;
|
7 |
|
8 | var crawler = require("../index.js");
|
9 |
|
10 |
|
11 |
|
12 | describe('Stat Plugin', function() {
|
13 |
|
14 | it('should return only one page stat', function(done) {
|
15 | var end = function(){
|
16 |
|
17 | assert(stat.data.numberOfUrls == 1, "Incorrect number of crawled urls : " + stat.data.numberOfUrls);
|
18 | assert(stat.data.contentTypes['text/html; charset=UTF-8'] == 1);
|
19 | assert(stat.data.numberOfHTMLs == 1, "Incorrect number of crawled HTML pages : " + stat.data.numberOfHTMLs);
|
20 |
|
21 | done();
|
22 |
|
23 | };
|
24 |
|
25 | crawler.init(null, end);
|
26 | var stat = new memstat.Plugin();
|
27 | crawler.registerPlugin(stat);
|
28 | crawler.queue({url : "http://localhost:9999/index.html"});
|
29 |
|
30 | });
|
31 |
|
32 |
|
33 | it('should return only one page stat for an HTML page without tag', function(done) {
|
34 | var end = function(){
|
35 |
|
36 | assert(stat.data.numberOfUrls == 1, "Incorrect number of crawled urls : " + stat.data.numberOfUrls);
|
37 |
|
38 | assert(stat.data.numberOfHTMLs == 0, "Incorrect number of crawled HTML pages : " + stat.data.numberOfHTMLs);
|
39 | done();
|
40 |
|
41 | };
|
42 | crawler.init(null, end);
|
43 | var stat = new memstat.Plugin();
|
44 | crawler.registerPlugin(stat);
|
45 |
|
46 | crawler.queue({url : "http://localhost:9999/without-tags.html"});
|
47 |
|
48 | });
|
49 |
|
50 |
|
51 | it('should return only one page stat for a text page', function(done) {
|
52 | var end = function(){
|
53 |
|
54 | assert(stat.data.numberOfUrls == 1, "Incorrect number of crawled urls : " + stat.data.numberOfUrls);
|
55 | assert(stat.data.contentTypes['text/plain; charset=UTF-8'] == 1);
|
56 | assert(stat.data.numberOfHTMLs == 0, "Incorrect number of crawled HTML pages : " + stat.data.numberOfHTMLs);
|
57 | done();
|
58 |
|
59 | };
|
60 | crawler.init(null, end);
|
61 | var stat = new memstat.Plugin();
|
62 | crawler.registerPlugin(stat);
|
63 |
|
64 | crawler.queue({url : "http://localhost:9999/test.txt"});
|
65 |
|
66 | });
|
67 |
|
68 | it('should return only internal pages stat', function(done) {
|
69 |
|
70 | var end = function(){
|
71 |
|
72 | assert(stat.data.numberOfUrls == 8, "Incorrect number of crawled urls : " + stat.data.numberOfUrls);
|
73 | assert(stat.data.contentTypes['text/html; charset=UTF-8'] == 6, "Incorrect number of HTML content type");
|
74 | assert(stat.data.numberOfHTMLs == 6, "Incorrect number of crawled HTML pages : " + stat.data.numberOfHTMLs);
|
75 | done();
|
76 |
|
77 | };
|
78 |
|
79 | crawler.init(null, end);
|
80 | var stat = new memstat.Plugin();
|
81 | crawler.registerPlugin(stat);
|
82 |
|
83 | crawler.queue({url : "http://localhost:9999/internal-links.html"});
|
84 |
|
85 | });
|
86 |
|
87 |
|
88 | it('should return 1 internal pages stat for a depthLimit = 0', function(done) {
|
89 |
|
90 | var end = function(){
|
91 |
|
92 | assert(stat.data.numberOfUrls == 1, "Incorrect number of crawled urls : " + stat.data.numberOfUrls);
|
93 | assert(stat.data.contentTypes['text/html; charset=UTF-8'] == 1, "Incorrect number of HTML content type");
|
94 | assert(stat.data.numberOfHTMLs == 1, "Incorrect number of crawled HTML pages : " + stat.data.numberOfHTMLs);
|
95 | done();
|
96 |
|
97 | };
|
98 |
|
99 | crawler.init({depthLimit : 0}, end);
|
100 | var stat = new memstat.Plugin();
|
101 | crawler.registerPlugin(stat);
|
102 |
|
103 |
|
104 | crawler.queue({url : "http://localhost:9999/internal-links.html"});
|
105 |
|
106 | });
|
107 |
|
108 | it('should return 4 internal pages stat for a depthLimit = 1', function(done) {
|
109 |
|
110 | var end = function(){
|
111 |
|
112 | assert(stat.data.numberOfUrls == 4, "Incorrect number of crawled urls : " + stat.data.numberOfUrls);
|
113 | assert(stat.data.contentTypes['text/html; charset=UTF-8'] == 4, "Incorrect number of HTML content type");
|
114 | assert(stat.data.numberOfHTMLs == 4, "Incorrect number of crawled HTML pages : " + stat.data.numberOfHTMLs);
|
115 | done();
|
116 |
|
117 | };
|
118 | crawler.init({depthLimit : 1}, end);
|
119 | var stat = new memstat.Plugin();
|
120 | crawler.registerPlugin(stat);
|
121 |
|
122 |
|
123 | crawler.queue({url : "http://localhost:9999/internal-links.html"});
|
124 |
|
125 | });
|
126 |
|
127 | it('should return only dofollow pages stat', function(done) {
|
128 | var end = function(){
|
129 |
|
130 | assert(stat.data.numberOfUrls == 6, "Incorrect number of crawled urls : " + stat.data.numberOfUrls);
|
131 | assert(stat.data.contentTypes['text/html; charset=UTF-8'] == 5, "Incorrect number of HTML content type");
|
132 | assert(stat.data.numberOfHTMLs == 5, "Incorrect number of crawled HTML pages : " + stat.data.numberOfHTMLs);
|
133 | done();
|
134 |
|
135 | };
|
136 | crawler.init({
|
137 | canCrawl : function(parentUri, link, anchor, isDoFollow) {
|
138 | return isDoFollow;
|
139 | }
|
140 | },
|
141 | end);
|
142 |
|
143 | var stat = new memstat.Plugin();
|
144 | crawler.registerPlugin(stat);
|
145 |
|
146 | crawler.queue({url : "http://localhost:9999/internal-links.html"});
|
147 |
|
148 | });
|
149 |
|
150 | });
|