1 | "use strict";
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | const crypto = require("crypto");
|
4 | const fs = require("fs");
|
5 | const fs_extra_1 = require("fs-extra");
|
6 | const moment = require("moment");
|
7 | const request = require("request");
|
8 | const Debug = require("debug");
|
9 | const UrlParser = require("url");
|
10 | const debug = Debug('bablic:seo');
|
11 | const zlib = require('zlib');
|
12 | const common_1 = require("./common");
|
13 | const http_1 = require("http");
|
14 | const _ = require("lodash");
|
15 | class SeoMiddleware {
|
16 | constructor(siteId, options, subDirOptions) {
|
17 | this.siteId = siteId;
|
18 | this.options = options;
|
19 | this.subDirOptions = Object.assign({ returnFull: true }, subDirOptions);
|
20 | }
|
21 | async writeToCache(url, locale, translated) {
|
22 | let cachePath = fullPathFromUrl(url, locale, this.options.cacheDir);
|
23 | try {
|
24 | await fs_extra_1.writeFile(cachePath, translated);
|
25 | }
|
26 | catch (e) {
|
27 | const cacheDir = getCacheDir(locale, this.options.cacheDir);
|
28 | debug("create cache dir", cacheDir);
|
29 | await fs_extra_1.ensureDir(cacheDir);
|
30 | debug("created");
|
31 | await fs_extra_1.writeFile(cachePath, translated);
|
32 | }
|
33 | }
|
34 | getHtml(url, locale, html) {
|
35 | if (!isRenderHealthy) {
|
36 | return Promise.reject(new Error("Render is not health"));
|
37 | }
|
38 | debug('getting from bablic', url, 'html:', !!html);
|
39 | let ld = '';
|
40 | if (this.subDirOptions.subDir) {
|
41 | ld = '&ld=subdir';
|
42 | if (this.subDirOptions.subDirBase)
|
43 | ld += '&sdb=' + encodeURIComponent(this.subDirOptions.subDirBase);
|
44 | if (this.subDirOptions.subDirOptional)
|
45 | ld += '&sdo=true';
|
46 | }
|
47 | return new Promise((resolve, reject) => {
|
48 | request({
|
49 | url: SEO_ROOT + "?site=" + this.siteId + "&el=" + locale + "&url=" + (encodeURIComponent(url)) + ld,
|
50 | headers: {
|
51 | "Accept-Encoding": "gzip,deflate"
|
52 | },
|
53 | method: 'POST',
|
54 | json: {
|
55 | html: html
|
56 | },
|
57 | timeout: 20000,
|
58 | encoding: null,
|
59 | }, (error, response, body) => {
|
60 | if (error)
|
61 | return reject(error);
|
62 | if (response.statusCode < 200 || response.statusCode >= 300)
|
63 | return reject(new Error("Status-" + response.statusCode));
|
64 | if (body == null)
|
65 | return reject(new Error('empty response'));
|
66 | debug('received translated html', response.statusCode);
|
67 | resolve(body);
|
68 | this.writeToCache(url, locale, body).catch((e) => {
|
69 | debug("error writing to cache", e);
|
70 | });
|
71 | });
|
72 | });
|
73 | }
|
74 | getFromCache(url, locale, skip, callback) {
|
75 | if (!this.options.useCache || skip)
|
76 | return callback();
|
77 | let file_path = fullPathFromUrl(url, locale, this.options.cacheDir);
|
78 | fs.stat(file_path, (error, file_stats) => {
|
79 | if (error)
|
80 | return callback(error);
|
81 | fs.readFile(file_path, (error, data) => {
|
82 | if (error)
|
83 | return callback(error);
|
84 | callback(error, data, cacheValid(file_stats, this.options.cacheDays || 1));
|
85 | });
|
86 | });
|
87 | }
|
88 | ;
|
89 | isEncoded(buffer) {
|
90 | try {
|
91 |
|
92 | let firstByte = buffer[0];
|
93 | let secondByte = buffer[1];
|
94 | return (firstByte == 0x1f) && (secondByte == 0x8b);
|
95 | }
|
96 | catch (err) {
|
97 | return false;
|
98 | }
|
99 | }
|
100 | readHeaderAsString(res, headerName) {
|
101 | let value = res.getHeader(headerName);
|
102 | if (!value)
|
103 | return "";
|
104 | if (Array.isArray(value)) {
|
105 | value = value[0];
|
106 | }
|
107 | if (typeof (value) !== "string") {
|
108 | return value + "";
|
109 | }
|
110 | else {
|
111 | return value;
|
112 | }
|
113 | }
|
114 | async purgeCache() {
|
115 | debug("purge cache", this.options.cacheDir);
|
116 | await fs_extra_1.rmdir(this.options.cacheDir);
|
117 | debug("purge done");
|
118 | }
|
119 | middleware() {
|
120 | return (meta, lastModified, keywordsByLocale, reverseKeywordByLocale, req, res, next) => {
|
121 | let replaceUrls = shouldReplaceUrls(req);
|
122 | if (!shouldHandle(req) && !replaceUrls) {
|
123 | debug('ignored', req.url);
|
124 | return next();
|
125 | }
|
126 | let acceptGZIP = (req.headers['accept-encoding'] || '').indexOf('gzip') > -1;
|
127 | delete req.headers['accept-encoding'];
|
128 | req.bablic.proxied = true;
|
129 | let protocol = req.headers['x-forwarded-proto'] || 'http';
|
130 | let my_url = protocol + "://" + req.headers.host + req.originalUrl;
|
131 | if (this.options.altHost)
|
132 | my_url = "http://" + this.options.altHost + req.originalUrl;
|
133 | this.getFromCache(my_url, req.bablic.locale, replaceUrls, (e, html, isValid) => {
|
134 | let cache_only = false;
|
135 | if (html) {
|
136 | debug('flushing from cache');
|
137 | res.setHeader('Content-Type', 'text/html; charset=utf-8');
|
138 | res.setHeader('Content-Language', req.bablic.locale);
|
139 | const encoded = this.isEncoded(html);
|
140 |
|
141 | if (acceptGZIP) {
|
142 |
|
143 | if (encoded) {
|
144 | res.setHeader('Content-Encoding', 'gzip');
|
145 | }
|
146 | }
|
147 | else {
|
148 |
|
149 | if (encoded) {
|
150 | html = zlib.gunzipSync(html);
|
151 | }
|
152 | }
|
153 | res.write(html);
|
154 | res.end();
|
155 | if (isValid)
|
156 | return;
|
157 | cache_only = true;
|
158 | }
|
159 | if (!isRenderHealthy && !replaceUrls) {
|
160 | debug('render not healthy, skipping');
|
161 | return next();
|
162 | }
|
163 | debug('overriding response');
|
164 | let _end = res.end;
|
165 | let _write = res.write;
|
166 | let _writeHead = res.writeHead;
|
167 | res.writeHead = (status, _headers) => {
|
168 | res.statusCode = status;
|
169 | if (_headers && typeof _headers === 'object') {
|
170 | let results = [];
|
171 | for (let key in _headers)
|
172 | results.push(res.setHeader(key, _headers[key]));
|
173 | return results;
|
174 | }
|
175 | };
|
176 | let headers = {};
|
177 | let _getHeader;
|
178 | if (cache_only) {
|
179 | _getHeader = res.getHeader;
|
180 | res.finished = false;
|
181 | Object.defineProperty(res, "headersSent", {
|
182 | get: () => {
|
183 | return false;
|
184 | },
|
185 | configurable: true,
|
186 | enumerable: true,
|
187 | });
|
188 | res.setHeader = (name, value) => headers[name.toLowerCase().trim()] = value;
|
189 | res.removeHeader = name => headers[name.toLowerCase().trim()] = null;
|
190 | res.getHeader = name => {
|
191 | let local = headers[name.toLowerCase().trim()];
|
192 | if (local)
|
193 | return local;
|
194 | if (local === null)
|
195 | return;
|
196 | return _getHeader.call(res, name);
|
197 | };
|
198 | }
|
199 | let restore_override = () => {
|
200 | if (!_write || !_end || !_writeHead)
|
201 | return;
|
202 | debug('undo override');
|
203 | res.write = _write;
|
204 | res.end = _end;
|
205 | res.writeHead = _writeHead;
|
206 | if (cache_only) {
|
207 | _getHeader = null;
|
208 | const getter = Object.getOwnPropertyDescriptor(http_1.OutgoingMessage.prototype, "headersSent");
|
209 | Object.defineProperty(res, "headersSent", getter);
|
210 | }
|
211 | _write = _end = _writeHead = null;
|
212 | };
|
213 | let head_checked = false;
|
214 | let is_html = null;
|
215 | let chunks = [];
|
216 | let check_head = () => {
|
217 | if (head_checked)
|
218 | return;
|
219 | const ct = this.readHeaderAsString(res, 'content-type');
|
220 | is_html = ct.indexOf('text/html') > -1 || replaceUrls;
|
221 | if (!is_html) {
|
222 | debug('not html', ct);
|
223 | restore_override();
|
224 | }
|
225 | if (res.statusCode < 200 || res.statusCode >= 300) {
|
226 | debug('error response', res.statusCode);
|
227 | is_html = false;
|
228 | restore_override();
|
229 | }
|
230 | head_checked = true;
|
231 | };
|
232 | let justAnObject = res;
|
233 | res.write = function (chunk, encoding, cb) {
|
234 | check_head();
|
235 | if (!is_html) {
|
236 | if (cache_only)
|
237 | return;
|
238 | debug('write original');
|
239 | return res.write.apply(res, arguments);
|
240 | }
|
241 | if (chunk instanceof Buffer)
|
242 | chunk = chunk.toString(encoding);
|
243 | chunks.push(chunk);
|
244 | if (typeof (encoding) == 'function')
|
245 | cb = encoding;
|
246 | if (cb)
|
247 | cb();
|
248 | };
|
249 | const self = this;
|
250 | let alt_host = this.options.altHost;
|
251 | justAnObject.end = function (chunk, encoding, cb) {
|
252 | if (typeof (encoding) == 'function') {
|
253 | cb = encoding;
|
254 | encoding = void (0);
|
255 | }
|
256 | check_head();
|
257 | if (!is_html) {
|
258 | if (cache_only)
|
259 | return;
|
260 | debug('flush original');
|
261 | restore_override();
|
262 | return res.end.apply(res, arguments);
|
263 | }
|
264 | if (chunk != null)
|
265 | res.write.apply(res, arguments);
|
266 | let original_html = chunks.join('');
|
267 | res.setHeader('Content-Language', req.bablic.locale);
|
268 | if (replaceUrls) {
|
269 | restore_override();
|
270 |
|
271 | if (lastModified && lastModified[req.bablic.locale] && /sitemap/i.test(req.url) &&
|
272 | self.readHeaderAsString(res, 'content-type').indexOf('xml') > -1) {
|
273 | const bablicDate = new Date(lastModified[req.bablic.locale]);
|
274 | original_html = original_html.replace(new RegExp("<lastmod>(.*?)</lastmod>", "g"), (captureAll, dateCapture) => {
|
275 | let siteMapDate = new Date(dateCapture);
|
276 | if (siteMapDate < bablicDate) {
|
277 | return "<lastmod>" + bablicDate.toISOString() + "</lastmod>";
|
278 | }
|
279 | else {
|
280 | return captureAll;
|
281 | }
|
282 | });
|
283 | }
|
284 | const locale = req.bablic.locale;
|
285 | const currentHost = req.headers.host;
|
286 | let originalDomains = [currentHost];
|
287 | if (alt_host)
|
288 | originalDomains.push(alt_host);
|
289 | if (meta.localeDetection === "custom" && meta.customUrls && meta.customUrls[locale]) {
|
290 | if (currentHost === meta.customUrls[locale]) {
|
291 | let supposeOriginDomain = meta.customUrls[meta.original];
|
292 | if (supposeOriginDomain) {
|
293 | originalDomains.push(supposeOriginDomain);
|
294 | }
|
295 | }
|
296 | }
|
297 | html = original_html.replace(detect_url, url => {
|
298 | if (ignore_not_html_or_xml.test(url))
|
299 | return url;
|
300 | if (_.every(originalDomains, (domain) => !url.includes(domain))) {
|
301 | return url;
|
302 | }
|
303 | let parsed = UrlParser.parse(url);
|
304 |
|
305 | if (keywordsByLocale && keywordsByLocale[req.bablic.locale]) {
|
306 | let keywords = keywordsByLocale[req.bablic.locale];
|
307 | parsed.pathname = parsed.pathname.split('/').map(part => keywords[part] || part).join('/');
|
308 | }
|
309 | return common_1.getLink(req.bablic.locale, parsed, meta, self.subDirOptions);
|
310 | });
|
311 | if (res.getHeader('Transfer-Encoding') !== 'chunked') {
|
312 | res.setHeader('Content-Length', Buffer.byteLength(html));
|
313 | }
|
314 | res.write(html, cb);
|
315 | return res.end();
|
316 | }
|
317 | self.getHtml(my_url, req.bablic.locale, original_html).then((data) => {
|
318 | if (cache_only)
|
319 | return;
|
320 | const isEncoded = self.isEncoded(data);
|
321 |
|
322 | if (!acceptGZIP) {
|
323 |
|
324 | if (isEncoded) {
|
325 | data = zlib.gunzipSync(data);
|
326 | }
|
327 | }
|
328 | else if (isEncoded) {
|
329 | res.setHeader('Content-Encoding', 'gzip');
|
330 | }
|
331 | restore_override();
|
332 | debug('flushing translated');
|
333 | if (res.getHeader('Transfer-Encoding') !== 'chunked') {
|
334 | res.setHeader('Content-Length', Buffer.byteLength(data));
|
335 | }
|
336 | res.write(data, cb);
|
337 | res.end();
|
338 | }, (error) => {
|
339 | if (cache_only)
|
340 | return;
|
341 | restore_override();
|
342 | console.error('[Bablic SDK] Error:', my_url, error);
|
343 | debug('flushing original');
|
344 | res.write(original_html, cb);
|
345 | res.end();
|
346 | });
|
347 | };
|
348 | return next();
|
349 | });
|
350 | };
|
351 | }
|
352 | }
|
353 | exports.SeoMiddleware = SeoMiddleware;
|
354 | const ignore_not_html_or_xml = /\.(js|css|jpg|jpeg|png|ico|mp4|wmv|ogg|mp3|avi|mpeg|bmp|wav|pdf|doc|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
|
355 | const detect_url = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
|
356 | let SEO_ROOT = 'http://seo.bablic.com/api/engine/seo';
|
357 | function setRenderServer(url) {
|
358 | if (!url) {
|
359 | throw new Error("Must be a valid URL");
|
360 | }
|
361 | SEO_ROOT = url;
|
362 | }
|
363 | exports.setRenderServer = setRenderServer;
|
364 | function hash(data) {
|
365 | return crypto.createHash('md5').update(data).digest('hex');
|
366 | }
|
367 | function fullPathFromUrl(url, locale, cacheDir) {
|
368 | return cacheDir + "/" + locale + "/" + hash(url);
|
369 | }
|
370 | function getCacheDir(locale, cacheDir) {
|
371 | return cacheDir + "/" + locale;
|
372 | }
|
373 | function cacheValid(file_stats, cacheDays) {
|
374 | let last_modified = moment(file_stats.mtime.getTime());
|
375 | let now = moment();
|
376 | last_modified.add(cacheDays, 'days');
|
377 | return now.isBefore(last_modified);
|
378 | }
|
379 | const filename_tester = /\.(js|css|jpg|jpeg|png|mp3|avi|mpeg|bmp|wav|pdf|doc|xml|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
|
380 | function ignorable(req) {
|
381 | return filename_tester.test(req.url);
|
382 | }
|
383 | const google_tester = /bot|crawler|yandex|bing|baidu|spider|facebook|twitter|80legs|google|seo/i;
|
384 | function isBot(req) {
|
385 | return google_tester.test(req.headers['user-agent']);
|
386 | }
|
387 | function shouldHandle(req) {
|
388 | return isBot(req) && !ignorable(req);
|
389 | }
|
390 | function shouldReplaceUrls(req) {
|
391 | return /sitemap|robots/i.test(req.url);
|
392 | }
|
393 | function renderHealthCheck() {
|
394 | return new Promise((resolve, reject) => {
|
395 | debug('render health check');
|
396 | request({
|
397 | url: SEO_ROOT,
|
398 | headers: {
|
399 | "Accept-Encoding": "gzip,deflate"
|
400 | },
|
401 | method: 'GET',
|
402 | timeout: 10000,
|
403 | }, (error) => {
|
404 | if (error) {
|
405 | debug('render is not healthy', error);
|
406 | return resolve(false);
|
407 | }
|
408 | debug('render is healthy');
|
409 | resolve(true);
|
410 | });
|
411 | });
|
412 | }
|
413 | let isRenderHealthy = true;
|
414 | setInterval(() => {
|
415 | renderHealthCheck().then((health) => {
|
416 | isRenderHealthy = health;
|
417 | });
|
418 | }, 1000 * 60);
|