1 | var moment = require('moment');
|
2 | var async = require('async');
|
3 | var _ = require('@sailshq/lodash');
|
4 | var fs = require('fs');
|
5 | var url = require('url');
|
6 |
|
7 | var defaultLocale = 'default';
|
8 |
|
9 | module.exports = {
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 | cacheLifetime: 60 * 60,
|
16 |
|
17 | piecesPerBatch: 100,
|
18 |
|
19 | moogBundle: {
|
20 | modules: [ 'apostrophe-site-map-custom-pages', 'apostrophe-site-map-pieces' ],
|
21 | directory: 'lib/modules'
|
22 | },
|
23 |
|
24 | afterConstruct: function(self) {
|
25 | self.apos.tasks.add(self.__meta.name, 'map', self.mapTask);
|
26 | self.apos.tasks.add(self.__meta.name, 'clear', self.clearTask);
|
27 | self.addRoutes();
|
28 | self.enableCache();
|
29 | },
|
30 |
|
31 | construct: function(self, options) {
|
32 |
|
33 | self.caching = true;
|
34 |
|
35 | self.cacheLifetime = options.cacheLifetime;
|
36 |
|
37 | self.piecesPerBatch = options.piecesPerBatch;
|
38 |
|
39 | self.baseUrl = options.baseUrl || self.apos.baseUrl;
|
40 |
|
41 | self.clearTask = function(apos, argv, callback) {
|
42 |
|
43 |
|
44 | return self.cache.clear(callback);
|
45 | };
|
46 |
|
47 | self.mapTask = function(apos, argv, callback) {
|
48 | if (argv['update-cache']) {
|
49 | self.caching = true;
|
50 | } else {
|
51 | self.caching = false;
|
52 | }
|
53 |
|
54 | if (!self.baseUrl) {
|
55 | return callback(new Error(
|
56 | 'You must specify the top-level baseUrl option when configuring Apostrophe\n' +
|
57 | 'to use this task. Example: baseUrl: "http://mycompany.com"\n\n' +
|
58 | 'Note there is NO TRAILING SLASH.\n\n' +
|
59 | 'Usually you will only do this in data/local.js, on production.'
|
60 | ));
|
61 | }
|
62 |
|
63 | return self.map(callback);
|
64 | };
|
65 |
|
66 | self.map = function(callback) {
|
67 |
|
68 | self.workflow = self.apos.modules['apostrophe-workflow'];
|
69 |
|
70 | var argv = self.apos.argv;
|
71 |
|
72 | if (self.caching) {
|
73 | self.cacheOutput = [];
|
74 | }
|
75 | return async.series([
|
76 | lock,
|
77 | init,
|
78 | map,
|
79 | hreflang,
|
80 | write,
|
81 | unlock
|
82 | ], callback);
|
83 |
|
84 | function lock(callback) {
|
85 | return self.apos.locks.lock('apostrophe-site-map', callback);
|
86 | }
|
87 |
|
88 | function init(callback) {
|
89 | self.format = argv.format || options.format || 'xml';
|
90 |
|
91 | self.indent = (typeof(argv.indent) !== 'undefined') ? argv.indent : options.indent;
|
92 |
|
93 | self.excludeTypes = options.excludeTypes || [];
|
94 |
|
95 | if (argv['exclude-types']) {
|
96 | self.excludeTypes = self.excludeTypes.concat(argv['exclude-types'].split(','));
|
97 | }
|
98 |
|
99 | self.perLocale = options.perLocale || argv['per-locale'];
|
100 |
|
101 |
|
102 |
|
103 |
|
104 | if (self.format === 'text') {
|
105 | self.perLocale = false;
|
106 | }
|
107 | return callback(null);
|
108 | }
|
109 |
|
110 | function map(callback) {
|
111 | self.maps = {};
|
112 | self.today = moment().format('YYYY-MM-DD');
|
113 |
|
114 | var locales = [ defaultLocale ];
|
115 |
|
116 | if (self.workflow) {
|
117 | locales = _.filter(_.keys(self.workflow.locales), function(locale) {
|
118 | return !locale.match(/-draft$/) && !self.workflow.locales[locale].private;
|
119 | });
|
120 | }
|
121 |
|
122 | return async.eachSeries(locales, function(locale, callback) {
|
123 | var req = self.apos.tasks.getAnonReq({ locale: locale });
|
124 | req.locale = locale;
|
125 | return async.series([
|
126 | _.partial(self.getPages, req, locale),
|
127 | _.partial(self.getPieces, req, locale),
|
128 | function(callback) {
|
129 | if (self.custom.length === 1) {
|
130 | return self.custom(callback);
|
131 | } else {
|
132 | return self.custom(req, locale, callback);
|
133 | }
|
134 | }
|
135 | ], callback);
|
136 | }, function(err) {
|
137 | if (err) {
|
138 | return callback(err);
|
139 | }
|
140 | return callback(null);
|
141 | });
|
142 | }
|
143 |
|
144 | function hreflang(callback) {
|
145 |
|
146 | var alternativesByGuid = {};
|
147 |
|
148 | each(function(entry) {
|
149 | if (!alternativesByGuid[entry.url.workflowGuid]) {
|
150 | alternativesByGuid[entry.url.workflowGuid] = [];
|
151 | }
|
152 | alternativesByGuid[entry.url.workflowGuid].push(entry);
|
153 | });
|
154 |
|
155 | each(function(entry) {
|
156 | if (self.workflow) {
|
157 | entry.url['xhtml:link'] = [{
|
158 | _attributes: {
|
159 | rel: 'alternate',
|
160 | hreflang: entry.url.workflowLocale,
|
161 | href: entry.url.loc
|
162 | }
|
163 | }];
|
164 | }
|
165 | var alternatives = alternativesByGuid[entry.url.workflowGuid];
|
166 | _.each(alternatives, function(alternative) {
|
167 | if (alternative === entry) {
|
168 | return;
|
169 | }
|
170 | entry.url['xhtml:link'].push({
|
171 | _attributes: {
|
172 | rel: 'alternate',
|
173 | hreflang: alternative.url.workflowLocale,
|
174 | href: alternative.url.loc
|
175 | }
|
176 | });
|
177 | });
|
178 | });
|
179 |
|
180 | each(function(entry) {
|
181 | delete entry.url.workflowLocale;
|
182 | delete entry.url.workflowGuid;
|
183 | }, true);
|
184 |
|
185 | return setImmediate(callback);
|
186 |
|
187 | function each(iterator, ignoreWorkflow) {
|
188 | _.each(self.maps, function(map) {
|
189 | _.each(map, function(entry) {
|
190 | if (typeof(entry) !== 'object') {
|
191 | return;
|
192 | }
|
193 |
|
194 | if (!entry.url.workflowGuid && !ignoreWorkflow) {
|
195 | return;
|
196 | }
|
197 | iterator(entry);
|
198 | });
|
199 | });
|
200 | }
|
201 |
|
202 | }
|
203 |
|
204 | function write(callback) {
|
205 | return self.writeSitemap(callback);
|
206 | }
|
207 |
|
208 | function unlock(callback) {
|
209 | return self.apos.locks.unlock('apostrophe-site-map', callback);
|
210 | }
|
211 | };
|
212 |
|
213 | self.getPages = function(req, locale, callback) {
|
214 | return self.apos.pages.find(req).areas(false).joins(false).sort({ level: 1, rank: 1 }).toArray(function(err, pages) {
|
215 | if (err) {
|
216 | return callback(err);
|
217 | }
|
218 | _.each(pages, self.output);
|
219 | return callback(null);
|
220 | });
|
221 | };
|
222 |
|
223 | self.getPieces = function(req, locale, callback) {
|
224 | var modules = _.filter(self.apos.modules, function(module, name) {
|
225 | return _.find(module.__meta.chain, function(entry) {
|
226 | return entry.name === 'apostrophe-pieces';
|
227 | });
|
228 | });
|
229 | return async.eachSeries(modules, function(module, callback) {
|
230 | if (_.includes(self.excludeTypes, module.name)) {
|
231 | return setImmediate(callback);
|
232 | }
|
233 |
|
234 |
|
235 | var done = false;
|
236 | var skip = 0;
|
237 | return async.whilst(
|
238 | function() { return !done; },
|
239 | function(callback) {
|
240 | return self.findPieces(req, module).skip(skip).limit(self.piecesPerBatch).toArray(function(err, pieces) {
|
241 | _.each(pieces, function(piece) {
|
242 | if (!piece._url) {
|
243 |
|
244 | return;
|
245 | }
|
246 |
|
247 |
|
248 | piece.level = 3;
|
249 |
|
250 |
|
251 | if (piece.startDate) {
|
252 | if (piece.startDate > self.today) {
|
253 | piece.level--;
|
254 | } else {
|
255 | piece.level++;
|
256 | }
|
257 | }
|
258 | self.output(piece);
|
259 | });
|
260 | if (!pieces.length) {
|
261 | done = true;
|
262 | } else {
|
263 | skip += pieces.length;
|
264 | }
|
265 | return callback(null);
|
266 | });
|
267 | }, callback);
|
268 | }, callback);
|
269 | };
|
270 |
|
271 | self.writeSitemap = function(callback) {
|
272 | if (!self.perLocale) {
|
273 |
|
274 | self.file = self.caching ? 'sitemap.xml' : (self.apos.argv.file || '/dev/stdout');
|
275 | var map = _.map(_.keys(self.maps), function(locale) {
|
276 | return _.map(self.maps[locale], self.stringify).join('\n');
|
277 | }).join('\n');
|
278 | self.writeMap(self.file, map);
|
279 | } else {
|
280 |
|
281 |
|
282 |
|
283 | self.ensureDir('sitemaps');
|
284 | _.each(self.maps, function(map, key) {
|
285 | var extension = (self.format === 'xml') ? 'xml' : 'txt';
|
286 | map = _.map(map, self.stringify).join('\n');
|
287 | self.writeMap('sitemaps/' + key + '.' + extension, map);
|
288 | });
|
289 | self.writeIndex();
|
290 | }
|
291 | if (self.caching) {
|
292 | return self.writeToCache(callback);
|
293 | }
|
294 | return callback(null);
|
295 | };
|
296 |
|
297 |
|
298 |
|
299 |
|
300 |
|
301 |
|
302 |
|
303 |
|
304 |
|
305 |
|
306 |
|
307 |
|
308 |
|
309 |
|
310 | self.stringify = function(value) {
|
311 | if (Array.isArray(value) && (self.format !== 'xml')) {
|
312 | return value.join('');
|
313 | }
|
314 | if (typeof(value) !== 'object') {
|
315 | if (self.format === 'xml') {
|
316 | return self.apos.utils.escapeHtml(value);
|
317 | }
|
318 | return value;
|
319 | }
|
320 | var xml = '';
|
321 | _.each(value, function(v, k) {
|
322 | if (k === '_attributes') {
|
323 | return;
|
324 | }
|
325 | if (Array.isArray(v)) {
|
326 | _.each(v, function(el) {
|
327 | element(k, el);
|
328 | });
|
329 | } else {
|
330 | element(k, v);
|
331 | }
|
332 | function element(k, v) {
|
333 | xml += '<' + k;
|
334 | if (v && v._attributes) {
|
335 | _.each(v._attributes, function(av, a) {
|
336 | xml += ' ' + a + '="' + self.apos.utils.escapeHtml(av) + '"';
|
337 | });
|
338 | }
|
339 | xml += '>';
|
340 | xml += self.stringify(v || '');
|
341 | xml += '</' + k + '>\n';
|
342 | }
|
343 | });
|
344 | return xml;
|
345 | };
|
346 |
|
347 | self.ensureDir = function(dir) {
|
348 | if (!self.caching) {
|
349 | dir = self.apos.rootDir + '/public/' + dir;
|
350 | try {
|
351 | fs.mkdirSync(dir);
|
352 | } catch (e) {
|
353 |
|
354 | }
|
355 | }
|
356 | };
|
357 |
|
358 | self.writeIndex = function() {
|
359 | var now = new Date();
|
360 | if (!self.baseUrl) {
|
361 | throw new Error(
|
362 | 'You must specify the top-level baseUrl option when configuring Apostrophe\n' +
|
363 | 'to use sitemap indexes. Example: baseUrl: "http://mycompany.com"\n\n' +
|
364 | 'Note there is NO TRAILING SLASH.\n\n' +
|
365 | 'Usually you will override this in data/local.js, on production.'
|
366 | );
|
367 | }
|
368 | self.writeFile('sitemaps/index.xml',
|
369 |
|
370 | '<?xml version="1.0" encoding="UTF-8"?>\n' +
|
371 | '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' +
|
372 | ' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n' +
|
373 | _.map(_.keys(self.maps), function(key) {
|
374 | var map = self.maps[key];
|
375 | var sitemap = ' <sitemap>\n' +
|
376 | ' <loc>' + self.baseUrl + self.apos.prefix + '/sitemaps/' + key + '.xml'
|
377 | + '</loc>\n' +
|
378 | ' <lastmod>' + now.toISOString() + '</lastmod>\n' +
|
379 | ' </sitemap>\n';
|
380 | return sitemap;
|
381 | }).join('') +
|
382 | '</sitemapindex>\n'
|
383 | );
|
384 |
|
385 | };
|
386 |
|
387 | self.writeMap = function(file, map) {
|
388 | if (self.format === 'xml') {
|
389 | self.writeXmlMap(file, map);
|
390 | } else {
|
391 | self.writeFile(file, map);
|
392 | }
|
393 | };
|
394 |
|
395 | self.writeXmlMap = function(file, map) {
|
396 | self.writeFile(file,
|
397 | '<?xml version="1.0" encoding="UTF-8"?>\n' +
|
398 | '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' +
|
399 | ' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n' +
|
400 | map +
|
401 | '</urlset>\n'
|
402 | );
|
403 | };
|
404 |
|
405 | self.writeFile = function(filename, s) {
|
406 | if (!self.caching) {
|
407 | filename = require('path').resolve(self.apos.rootDir + '/public', filename);
|
408 | if (filename === '/dev/stdout') {
|
409 |
|
410 | fs.writeSync(1, s);
|
411 | } else {
|
412 | fs.writeFileSync(filename, s);
|
413 | }
|
414 | } else {
|
415 | self.cacheOutput.push({
|
416 | filename: filename,
|
417 | data: s,
|
418 | createdAt: new Date()
|
419 | });
|
420 | }
|
421 | };
|
422 |
|
423 | self.writeToCache = function(callback) {
|
424 | return async.series([
|
425 | clear,
|
426 | insert
|
427 | ], callback);
|
428 |
|
429 | function clear(callback) {
|
430 | return self.cache.clear(callback);
|
431 | }
|
432 |
|
433 | function insert(callback) {
|
434 | return async.eachSeries(self.cacheOutput, function(doc, callback) {
|
435 | return self.cache.set(doc.filename, doc, self.cacheLifetime, callback);
|
436 | }, callback);
|
437 | }
|
438 | };
|
439 |
|
440 |
|
441 |
|
442 |
|
443 | self.custom = function(req, locale, callback) {
|
444 | return callback(null);
|
445 | };
|
446 |
|
447 | self.findPieces = function(req, module) {
|
448 | return module.find(req).published(true).joins(false).areas(false);
|
449 | };
|
450 |
|
451 |
|
452 |
|
453 |
|
454 |
|
455 |
|
456 |
|
457 |
|
458 | self.output = function(page) {
|
459 | var locale = page.workflowLocale || defaultLocale;
|
460 | if (self.workflow) {
|
461 | if (!self.workflow.locales[locale]) {
|
462 | return;
|
463 | }
|
464 | if (self.workflow.locales[locale].private) {
|
465 | return;
|
466 | }
|
467 | }
|
468 |
|
469 | if (!_.includes(self.excludeTypes, page.type)) {
|
470 | var url;
|
471 |
|
472 | if (self.format === 'text') {
|
473 | if (self.indent) {
|
474 | var i;
|
475 |
|
476 | for (i = 0; (i < page.level); i++) {
|
477 | self.write(locale, ' ');
|
478 | }
|
479 |
|
480 | self.write(locale, page._url + '\n');
|
481 | }
|
482 | } else {
|
483 | url = page._url;
|
484 | var priority = (page.level < 10) ? (1.0 - page.level / 10) : 0.1;
|
485 |
|
486 | if (typeof (page.siteMapPriority) === 'number') {
|
487 | priority = page.siteMapPriority;
|
488 | }
|
489 |
|
490 | self.write(locale, {
|
491 | url: {
|
492 | priority: priority,
|
493 | changefreq: 'daily',
|
494 | loc: url,
|
495 | workflowGuid: page.workflowGuid,
|
496 | workflowLocale: locale
|
497 | }
|
498 | });
|
499 | }
|
500 | }
|
501 |
|
502 | };
|
503 |
|
504 |
|
505 |
|
506 |
|
507 | self.write = function(locale, s) {
|
508 | self.maps[locale] = self.maps[locale] || [];
|
509 | self.maps[locale].push(s);
|
510 | };
|
511 |
|
512 | self.addRoutes = function() {
|
513 |
|
514 | self.apos.app.get('/sitemap.xml', function(req, res) {
|
515 | return self.sendCache(res, 'sitemap.xml');
|
516 | });
|
517 | self.apos.app.get('/sitemaps/*', function(req, res) {
|
518 | return self.sendCache(res, 'sitemaps/' + req.params[0]);
|
519 | });
|
520 | };
|
521 |
|
522 | self.sendCache = function(res, path) {
|
523 | return self.cache.get(path, function(err, file) {
|
524 | if (err) {
|
525 | return fail(err);
|
526 | }
|
527 | if (!file) {
|
528 |
|
529 |
|
530 |
|
531 |
|
532 |
|
533 |
|
534 |
|
535 |
|
536 |
|
537 | return self.cache.get(self.perLocale ? 'sitemaps/index.xml' : 'sitemap.xml', function(err, exists) {
|
538 | if (err) {
|
539 | return fail(err);
|
540 | }
|
541 | if (exists) {
|
542 | return notFound();
|
543 | }
|
544 | return self.cacheAndRetry(res, path);
|
545 | });
|
546 | }
|
547 | return res.contentType('text/xml').send(file.data);
|
548 | });
|
549 |
|
550 | function notFound() {
|
551 | return res.status(404).send('not found');
|
552 | }
|
553 |
|
554 | function fail(err) {
|
555 | console.error(err);
|
556 | return res.status(500).send('error');
|
557 | }
|
558 | };
|
559 |
|
560 | self.cacheAndRetry = function(res, path) {
|
561 | return self.map(function(err) {
|
562 | if (err) {
|
563 | return fail(err);
|
564 | }
|
565 | return self.sendCache(res, path);
|
566 | });
|
567 | function fail(err) {
|
568 | console.error(err);
|
569 | return res.status(500).send('error');
|
570 | }
|
571 | };
|
572 |
|
573 | self.enableCache = function() {
|
574 | self.cache = self.apos.caches.get('apostrophe-sitemap');
|
575 | };
|
576 | }
|
577 | };
|