UNPKG

12.5 kBJavaScriptView Raw
1/*
2 * grunt-check-pages
3 * https://github.com/DavidAnson/grunt-check-pages
4 *
5 * Copyright (c) 2014-2015 David Anson
6 * Licensed under the MIT license.
7 */
8
9'use strict';
10
11module.exports = function(grunt) {
12 // Imports
13 var cheerio = require('cheerio');
14 var crchash = require('crc-hash');
15 var crypto = require('crypto');
16 var request = require('request');
17 var sax = require('sax');
18 var url = require('url');
19
20 // Global variables
21 var userAgent = 'grunt-check-pages/' + require('../package.json').version;
22 var pendingCallbacks = [];
23 var issues = [];
24
25 // Logs an error for a page
26 function logPageError(page, message) {
27 grunt.log.error(message);
28 issues.push([page, message]);
29 }
30
31 // Returns true if and only if the specified link is on the list to ignore
32 function isLinkIgnored(link, options) {
33 return options.linksToIgnore.some(function(linkToIgnore) {
34 return (linkToIgnore === link);
35 });
36 }
37
38 // Returns a callback to test the specified link
39 function testLink(page, link, options, retryWithGet) {
40 return function (callback) {
41 var logError = logPageError.bind(null, page);
42 var start = Date.now();
43 var hash = null;
44 var linkHash = null;
45 if (options.queryHashes) {
46 // Create specified hash algorithm
47 var query = url.parse(link, true).query;
48 if (query.sha1) {
49 linkHash = query.sha1;
50 hash = crypto.createHash('sha1');
51 } else if (query.md5) {
52 linkHash = query.md5;
53 hash = crypto.createHash('md5');
54 } else if (query.crc32) {
55 linkHash = query.crc32;
56 hash = crchash.createHash('crc32');
57 }
58 }
59 var res;
60 var useGetRequest = retryWithGet || options.queryHashes;
61 var req = request(link, {
62 method: useGetRequest ? 'GET' : 'HEAD',
63 followRedirect: !options.noRedirects
64 })
65 .on('error', function(err) {
66 logError('Link error (' + err.message + '): ' + link + ' (' + (Date.now() - start) + 'ms)');
67 req.abort();
68 callback();
69 })
70 .on('response', function(response) {
71 // Capture response object for use during 'end'
72 res = response;
73 })
74 .on('end', function() {
75 var elapsed = Date.now() - start;
76 if ((200 <= res.statusCode) && (res.statusCode < 300)) {
77 grunt.log.ok('Link: ' + link + ' (' + elapsed + 'ms)');
78 if (hash) {
79 hash.end();
80 var contentHash = hash.read();
81 if (linkHash.toUpperCase() === contentHash.toUpperCase()) {
82 grunt.log.ok('Hash: ' + link);
83 } else {
84 logError('Hash error (' + contentHash.toLowerCase() + '): ' + link);
85 }
86 }
87 } else if (useGetRequest) {
88 if ((page !== res.request.href) && options.noRedirects) {
89 logError('Redirected link (' + res.statusCode + '): ' + link + ' -> ' + (res.headers.location || '[Missing Location header]') + ' (' + elapsed + 'ms)');
90 } else {
91 logError('Bad link (' + res.statusCode + '): ' + link + ' (' + elapsed + 'ms)');
92 }
93 } else {
94 // Retry HEAD request as GET to be sure
95 testLink(page, link, options, true)(callback);
96 return;
97 }
98 callback();
99 });
100 if (hash) {
101 // Pipe content to hash algorithm
102 hash.setEncoding('hex');
103 req.pipe(hash);
104 }
105 if (options.noLocalLinks) {
106 var localhost = /^(localhost)|(127\.\d\d?\d?\.\d\d?\d?\.\d\d?\d?)|(\[[0\:]*\:[0\:]*\:0?0?0?1\])$/i;
107 if (localhost.test(req.uri.host)) {
108 logError('Local link: ' + link);
109 }
110 }
111 };
112 }
113
114 // Adds pending callbacks for all links matching <element attribute='*'/>
115 function addLinks($, element, attribute, page, options, index) {
116 var pageHostname = url.parse(page).hostname;
117 $(element).each(function() {
118 var link = $(this).attr(attribute);
119 if (link) {
120 var resolvedLink = url.resolve(page, link);
121 var parsedLink = url.parse(resolvedLink);
122 if (((parsedLink.protocol === 'http:') || (parsedLink.protocol === 'https:')) &&
123 (!options.onlySameDomain || (parsedLink.hostname === pageHostname)) &&
124 !isLinkIgnored(resolvedLink, options)) {
125 // Add to beginning of queue (in order) so links gets processed before the next page
126 pendingCallbacks.splice(index, 0, testLink(page, resolvedLink, options));
127 index++;
128 }
129 }
130 });
131 return index;
132 }
133
134 // Returns a callback to test the specified page
135 function testPage(page, options) {
136 return function (callback) {
137 var logError = logPageError.bind(null, page);
138 var start = Date.now();
139 request.get(page, function(err, res, body) {
140 var elapsed = Date.now() - start;
141 if (err) {
142 logError('Page error (' + err.message + '): ' + page + ' (' + elapsed + 'ms)');
143 } else if ((res.statusCode < 200) || (300 <= res.statusCode)) {
144 logError('Bad page (' + res.statusCode + '): ' + page + ' (' + elapsed + 'ms)');
145 } else {
146 if (page === res.request.href) {
147 grunt.log.ok('Page: ' + page + ' (' + elapsed + 'ms)');
148 } else {
149 grunt.log.ok('Page: ' + page + ' -> ' + res.request.href + ' (' + elapsed + 'ms)');
150 // Update page to account for redirects
151 page = res.request.href;
152 }
153 if (options.checkLinks) {
154 // Check the page's links for validity (i.e., HTTP HEAD returns OK)
155 var $ = cheerio.load(body);
156 var index = 0;
157 index = addLinks($, 'a', 'href', page, options, index);
158 index = addLinks($, 'area', 'href', page, options, index);
159 index = addLinks($, 'audio', 'src', page, options, index);
160 index = addLinks($, 'embed', 'src', page, options, index);
161 index = addLinks($, 'iframe', 'src', page, options, index);
162 index = addLinks($, 'img', 'src', page, options, index);
163 index = addLinks($, 'input', 'src', page, options, index);
164 index = addLinks($, 'link', 'href', page, options, index);
165 index = addLinks($, 'object', 'data', page, options, index);
166 index = addLinks($, 'script', 'src', page, options, index);
167 index = addLinks($, 'source', 'src', page, options, index);
168 index = addLinks($, 'track', 'src', page, options, index);
169 index = addLinks($, 'video', 'src', page, options, index);
170 }
171 if (options.checkXhtml) {
172 // Check the page's structure for XHTML compliance
173 var parser = sax.parser(true);
174 parser.onerror = function(error) {
175 logError(error.message.replace(/\n/g, ', '));
176 };
177 parser.write(body);
178 }
179 if (options.maxResponseTime) {
180 // Check the page's response time
181 if (options.maxResponseTime < elapsed) {
182 logError('Page response took more than ' + options.maxResponseTime + 'ms to complete');
183 }
184 }
185 if (options.checkCaching) {
186 // Check the page's cache headers
187 var cacheControl = res.headers['cache-control'];
188 if (cacheControl) {
189 if (!/max-age|max-stale|min-fresh|must-revalidate|no-cache|no-store|no-transform|only-if-cached|private|proxy-revalidate|public|s-maxage/.test(cacheControl)) {
190 logError('Invalid Cache-Control header in response: ' + cacheControl);
191 }
192 } else {
193 logError('Missing Cache-Control header in response');
194 }
195 var etag = res.headers.etag;
196 if (etag) {
197 if (!/^(W\/)?\"[^\"]*\"$/.test(etag)) {
198 logError('Invalid ETag header in response: ' + etag);
199 }
200 } else if (!cacheControl || !/no-cache|max-age=0/.test(cacheControl)) { // Don't require ETag for responses that won't be cached
201 logError('Missing ETag header in response');
202 }
203 }
204 if (options.checkCompression) {
205 // Check that the page was compressed
206 var contentEncoding = res.headers['content-encoding'];
207 if (contentEncoding) {
208 if (!/^(deflate|gzip)$/.test(contentEncoding)) {
209 logError('Invalid Content-Encoding header in response: ' + contentEncoding);
210 }
211 } else {
212 logError('Missing Content-Encoding header in response');
213 }
214 }
215 }
216 callback();
217 });
218 };
219 }
220
221 // Register the task with Grunt
222 grunt.registerMultiTask('checkPages', 'Checks various aspects of a web page for correctness.', function() {
223 // Check for unsupported use
224 if (this.files.length) {
225 grunt.fail.warn('checkPages task does not use files; remove the files parameter');
226 }
227
228 // Check for required options
229 var options = this.options();
230 if (!options.pageUrls) {
231 grunt.fail.warn('pageUrls option is not present; it should be an array of URLs');
232 } else if (!Array.isArray(options.pageUrls)) {
233 grunt.fail.warn('pageUrls option is invalid; it should be an array of URLs');
234 }
235
236 // Check for and normalize optional options
237 options.checkLinks = !!options.checkLinks;
238 options.onlySameDomain = !!options.onlySameDomain;
239 options.noRedirects = !!options.noRedirects;
240 options.noLocalLinks = !!options.noLocalLinks;
241 options.queryHashes = !!options.queryHashes;
242 options.linksToIgnore = options.linksToIgnore || [];
243 if (!Array.isArray(options.linksToIgnore)) {
244 grunt.fail.warn('linksToIgnore option is invalid; it should be an array');
245 }
246 options.checkXhtml = !!options.checkXhtml;
247 options.checkCaching = !!options.checkCaching;
248 options.checkCompression = !!options.checkCompression;
249 if (options.maxResponseTime && (typeof (options.maxResponseTime) !== 'number' || (options.maxResponseTime <= 0))) {
250 grunt.fail.warn('maxResponseTime option is invalid; it should be a positive number');
251 }
252 if (options.userAgent !== undefined) {
253 if (options.userAgent) {
254 if (typeof (options.userAgent) === 'string') {
255 userAgent = options.userAgent;
256 } else {
257 grunt.fail.warn('userAgent option is invalid; it should be a string or null');
258 }
259 } else {
260 userAgent = null;
261 }
262 }
263 options.summary = !!options.summary;
264
265 // Set request defaults
266 var defaults = {
267 gzip: true,
268 headers: {
269 // Prevent caching so response time will be accurate
270 'Cache-Control': 'no-cache',
271 'Pragma': 'no-cache'
272 }
273 };
274 if (userAgent) {
275 defaults.headers['User-Agent'] = userAgent;
276 }
277 request = request.defaults(defaults);
278
279 // Queue callbacks for each page
280 options.pageUrls.forEach(function(page) {
281 pendingCallbacks.push(testPage(page, options));
282 });
283
284 // Queue 'done' callback
285 var done = this.async();
286 pendingCallbacks.push(function() {
287 var issueCount = issues.length;
288 if (issueCount) {
289 if (options.summary) {
290 var summary = 'Summary of issues:\n';
291 var currentPage;
292 issues.forEach(function(issue) {
293 var page = issue[0];
294 var message = issue[1];
295 if (currentPage !== page) {
296 summary += ' ' + page + '\n';
297 currentPage = page;
298 }
299 summary += ' ' + message + '\n';
300 });
301 grunt.log.error(summary);
302 }
303 var warning = issueCount + ' issue' + (issueCount > 1 ? 's' : '') + ', see above.';
304 if (!options.summary) {
305 warning += ' (Set options.summary for a summary.)';
306 }
307 grunt.fail.warn(warning);
308 }
309 done();
310 });
311
312 // Process the queue
313 function next() {
314 var callback = pendingCallbacks.shift();
315 callback(next);
316 }
317 next();
318 });
319};