UNPKG

18.4 kBJavaScriptView Raw
1"use strict";
2Object.defineProperty(exports, "__esModule", { value: true });
3const crypto = require("crypto");
4const fs = require("fs");
5const fs_extra_1 = require("fs-extra");
6const moment = require("moment");
7const request = require("request");
8const Debug = require("debug");
9const UrlParser = require("url");
10const debug = Debug('bablic:seo');
11const zlib = require('zlib');
12const common_1 = require("./common");
13const http_1 = require("http");
14const _ = require("lodash");
15class SeoMiddleware {
16 constructor(siteId, options, subDirOptions) {
17 this.siteId = siteId;
18 this.options = options;
19 this.subDirOptions = Object.assign({ returnFull: true }, subDirOptions);
20 }
21 async writeToCache(url, locale, translated) {
22 let cachePath = fullPathFromUrl(url, locale, this.options.cacheDir);
23 try {
24 await fs_extra_1.writeFile(cachePath, translated);
25 }
26 catch (e) {
27 const cacheDir = getCacheDir(locale, this.options.cacheDir);
28 debug("create cache dir", cacheDir);
29 await fs_extra_1.ensureDir(cacheDir);
30 debug("created");
31 await fs_extra_1.writeFile(cachePath, translated);
32 }
33 }
34 getHtml(url, locale, html) {
35 if (!isRenderHealthy) {
36 return Promise.reject(new Error("Render is not health"));
37 }
38 debug('getting from bablic', url, 'html:', !!html);
39 let ld = '';
40 if (this.subDirOptions.subDir) {
41 ld = '&ld=subdir';
42 if (this.subDirOptions.subDirBase)
43 ld += '&sdb=' + encodeURIComponent(this.subDirOptions.subDirBase);
44 if (this.subDirOptions.subDirOptional)
45 ld += '&sdo=true';
46 }
47 return new Promise((resolve, reject) => {
48 request({
49 url: SEO_ROOT + "?site=" + this.siteId + "&el=" + locale + "&url=" + (encodeURIComponent(url)) + ld,
50 headers: {
51 "Accept-Encoding": "gzip,deflate"
52 },
53 method: 'POST',
54 json: {
55 html: html
56 },
57 timeout: 20000,
58 encoding: null,
59 }, (error, response, body) => {
60 if (error)
61 return reject(error);
62 if (response.statusCode < 200 || response.statusCode >= 300)
63 return reject(new Error("Status-" + response.statusCode));
64 if (body == null)
65 return reject(new Error('empty response'));
66 debug('received translated html', response.statusCode);
67 resolve(body);
68 this.writeToCache(url, locale, body).catch((e) => {
69 debug("error writing to cache", e);
70 });
71 });
72 });
73 }
74 getFromCache(url, locale, skip, callback) {
75 if (!this.options.useCache || skip)
76 return callback();
77 let file_path = fullPathFromUrl(url, locale, this.options.cacheDir);
78 fs.stat(file_path, (error, file_stats) => {
79 if (error)
80 return callback(error);
81 fs.readFile(file_path, (error, data) => {
82 if (error)
83 return callback(error);
84 callback(error, data, cacheValid(file_stats, this.options.cacheDays || 1));
85 });
86 });
87 }
88 ;
89 isEncoded(buffer) {
90 try {
91 // every gzip content start with 0x1f8b 2 bytes
92 let firstByte = buffer[0];
93 let secondByte = buffer[1];
94 return (firstByte == 0x1f) && (secondByte == 0x8b);
95 }
96 catch (err) {
97 return false;
98 }
99 }
100 readHeaderAsString(res, headerName) {
101 let value = res.getHeader(headerName);
102 if (!value)
103 return "";
104 if (Array.isArray(value)) {
105 value = value[0];
106 }
107 if (typeof (value) !== "string") {
108 return value + "";
109 }
110 else {
111 return value;
112 }
113 }
114 async purgeCache() {
115 debug("purge cache", this.options.cacheDir);
116 await fs_extra_1.rmdir(this.options.cacheDir);
117 debug("purge done");
118 }
119 middleware() {
120 return (meta, lastModified, keywordsByLocale, reverseKeywordByLocale, req, res, next) => {
121 let replaceUrls = shouldReplaceUrls(req);
122 if (!shouldHandle(req) && !replaceUrls) {
123 debug('ignored', req.url);
124 return next();
125 }
126 let acceptGZIP = (req.headers['accept-encoding'] || '').indexOf('gzip') > -1;
127 delete req.headers['accept-encoding'];
128 req.bablic.proxied = true;
129 let protocol = req.headers['x-forwarded-proto'] || 'http';
130 let my_url = protocol + "://" + req.headers.host + req.originalUrl;
131 if (this.options.altHost)
132 my_url = "http://" + this.options.altHost + req.originalUrl;
133 this.getFromCache(my_url, req.bablic.locale, replaceUrls, (e, html, isValid) => {
134 let cache_only = false;
135 if (html) {
136 debug('flushing from cache');
137 res.setHeader('Content-Type', 'text/html; charset=utf-8');
138 res.setHeader('Content-Language', req.bablic.locale);
139 const encoded = this.isEncoded(html);
140 // if browser support gzip encoding
141 if (acceptGZIP) {
142 // adding gzip flag
143 if (encoded) {
144 res.setHeader('Content-Encoding', 'gzip');
145 }
146 }
147 else {
148 // if the content from cache is gzipped
149 if (encoded) {
150 html = zlib.gunzipSync(html);
151 }
152 }
153 res.write(html);
154 res.end();
155 if (isValid)
156 return;
157 cache_only = true;
158 }
159 if (!isRenderHealthy && !replaceUrls) {
160 debug('render not healthy, skipping');
161 return next();
162 }
163 debug('overriding response');
164 let _end = res.end;
165 let _write = res.write;
166 let _writeHead = res.writeHead;
167 res.writeHead = (status, _headers) => {
168 res.statusCode = status;
169 if (_headers && typeof _headers === 'object') {
170 let results = [];
171 for (let key in _headers)
172 results.push(res.setHeader(key, _headers[key]));
173 return results;
174 }
175 };
176 let headers = {};
177 let _getHeader;
178 if (cache_only) {
179 _getHeader = res.getHeader;
180 res.finished = false;
181 Object.defineProperty(res, "headersSent", {
182 get: () => {
183 return false;
184 },
185 configurable: true,
186 enumerable: true,
187 });
188 res.setHeader = (name, value) => headers[name.toLowerCase().trim()] = value;
189 res.removeHeader = name => headers[name.toLowerCase().trim()] = null;
190 res.getHeader = name => {
191 let local = headers[name.toLowerCase().trim()];
192 if (local)
193 return local;
194 if (local === null)
195 return;
196 return _getHeader.call(res, name);
197 };
198 }
199 let restore_override = () => {
200 if (!_write || !_end || !_writeHead)
201 return;
202 debug('undo override');
203 res.write = _write;
204 res.end = _end;
205 res.writeHead = _writeHead;
206 if (cache_only) {
207 _getHeader = null;
208 const getter = Object.getOwnPropertyDescriptor(http_1.OutgoingMessage.prototype, "headersSent");
209 Object.defineProperty(res, "headersSent", getter);
210 }
211 _write = _end = _writeHead = null;
212 };
213 let head_checked = false;
214 let is_html = null;
215 let chunks = [];
216 let check_head = () => {
217 if (head_checked)
218 return;
219 const ct = this.readHeaderAsString(res, 'content-type');
220 is_html = ct.indexOf('text/html') > -1 || replaceUrls;
221 if (!is_html) {
222 debug('not html', ct);
223 restore_override();
224 }
225 if (res.statusCode < 200 || res.statusCode >= 300) {
226 debug('error response', res.statusCode);
227 is_html = false;
228 restore_override();
229 }
230 head_checked = true;
231 };
232 let justAnObject = res;
233 res.write = function (chunk, encoding, cb) {
234 check_head();
235 if (!is_html) {
236 if (cache_only)
237 return;
238 debug('write original');
239 return res.write.apply(res, arguments);
240 }
241 if (chunk instanceof Buffer)
242 chunk = chunk.toString(encoding);
243 chunks.push(chunk);
244 if (typeof (encoding) == 'function')
245 cb = encoding;
246 if (cb)
247 cb();
248 };
249 const self = this;
250 let alt_host = this.options.altHost;
251 justAnObject.end = function (chunk, encoding, cb) {
252 if (typeof (encoding) == 'function') {
253 cb = encoding;
254 encoding = void (0);
255 }
256 check_head();
257 if (!is_html) {
258 if (cache_only)
259 return;
260 debug('flush original');
261 restore_override();
262 return res.end.apply(res, arguments);
263 }
264 if (chunk != null)
265 res.write.apply(res, arguments);
266 let original_html = chunks.join('');
267 res.setHeader('Content-Language', req.bablic.locale);
268 if (replaceUrls) {
269 restore_override();
270 // detect that URL is of sitemap and is XML (res content type).If XML, then try to parse XML. And go over all
271 if (lastModified && lastModified[req.bablic.locale] && /sitemap/i.test(req.url) &&
272 self.readHeaderAsString(res, 'content-type').indexOf('xml') > -1) {
273 const bablicDate = new Date(lastModified[req.bablic.locale]);
274 original_html = original_html.replace(new RegExp("<lastmod>(.*?)</lastmod>", "g"), (captureAll, dateCapture) => {
275 let siteMapDate = new Date(dateCapture);
276 if (siteMapDate < bablicDate) {
277 return "<lastmod>" + bablicDate.toISOString() + "</lastmod>";
278 }
279 else {
280 return captureAll;
281 }
282 });
283 }
284 const locale = req.bablic.locale;
285 const currentHost = req.headers.host;
286 let originalDomains = [currentHost];
287 if (alt_host)
288 originalDomains.push(alt_host);
289 if (meta.localeDetection === "custom" && meta.customUrls && meta.customUrls[locale]) {
290 if (currentHost === meta.customUrls[locale]) {
291 let supposeOriginDomain = meta.customUrls[meta.original];
292 if (supposeOriginDomain) {
293 originalDomains.push(supposeOriginDomain);
294 }
295 }
296 }
297 html = original_html.replace(detect_url, url => {
298 if (ignore_not_html_or_xml.test(url))
299 return url;
300 if (_.every(originalDomains, (domain) => !url.includes(domain))) {
301 return url;
302 }
303 let parsed = UrlParser.parse(url);
304 // translate URLs in sitemaps and such
305 if (keywordsByLocale && keywordsByLocale[req.bablic.locale]) {
306 let keywords = keywordsByLocale[req.bablic.locale];
307 parsed.pathname = parsed.pathname.split('/').map(part => keywords[part] || part).join('/');
308 }
309 return common_1.getLink(req.bablic.locale, parsed, meta, self.subDirOptions);
310 });
311 if (res.getHeader('Transfer-Encoding') !== 'chunked') {
312 res.setHeader('Content-Length', Buffer.byteLength(html));
313 }
314 res.write(html, cb);
315 return res.end();
316 }
317 self.getHtml(my_url, req.bablic.locale, original_html).then((data) => {
318 if (cache_only)
319 return;
320 const isEncoded = self.isEncoded(data);
321 // if browser doesnt support gzip encoding
322 if (!acceptGZIP) {
323 // if the content is gzipped
324 if (isEncoded) {
325 data = zlib.gunzipSync(data);
326 }
327 }
328 else if (isEncoded) {
329 res.setHeader('Content-Encoding', 'gzip');
330 }
331 restore_override();
332 debug('flushing translated');
333 if (res.getHeader('Transfer-Encoding') !== 'chunked') {
334 res.setHeader('Content-Length', Buffer.byteLength(data));
335 }
336 res.write(data, cb);
337 res.end();
338 }, (error) => {
339 if (cache_only)
340 return;
341 restore_override();
342 console.error('[Bablic SDK] Error:', my_url, error);
343 debug('flushing original');
344 res.write(original_html, cb);
345 res.end();
346 });
347 };
348 return next();
349 });
350 };
351 }
352}
353exports.SeoMiddleware = SeoMiddleware;
354const ignore_not_html_or_xml = /\.(js|css|jpg|jpeg|png|ico|mp4|wmv|ogg|mp3|avi|mpeg|bmp|wav|pdf|doc|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
355const detect_url = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
356let SEO_ROOT = 'http://seo.bablic.com/api/engine/seo';
357function setRenderServer(url) {
358 if (!url) {
359 throw new Error("Must be a valid URL");
360 }
361 SEO_ROOT = url;
362}
363exports.setRenderServer = setRenderServer;
364function hash(data) {
365 return crypto.createHash('md5').update(data).digest('hex');
366}
367function fullPathFromUrl(url, locale, cacheDir) {
368 return cacheDir + "/" + locale + "/" + hash(url);
369}
370function getCacheDir(locale, cacheDir) {
371 return cacheDir + "/" + locale;
372}
373function cacheValid(file_stats, cacheDays) {
374 let last_modified = moment(file_stats.mtime.getTime());
375 let now = moment();
376 last_modified.add(cacheDays, 'days');
377 return now.isBefore(last_modified);
378}
379const filename_tester = /\.(js|css|jpg|jpeg|png|mp3|avi|mpeg|bmp|wav|pdf|doc|xml|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
380function ignorable(req) {
381 return filename_tester.test(req.url);
382}
383const google_tester = /bot|crawler|yandex|bing|baidu|spider|facebook|twitter|80legs|google|seo/i;
384function isBot(req) {
385 return google_tester.test(req.headers['user-agent']);
386}
387function shouldHandle(req) {
388 return isBot(req) && !ignorable(req);
389}
390function shouldReplaceUrls(req) {
391 return /sitemap|robots/i.test(req.url);
392}
393function renderHealthCheck() {
394 return new Promise((resolve, reject) => {
395 debug('render health check');
396 request({
397 url: SEO_ROOT,
398 headers: {
399 "Accept-Encoding": "gzip,deflate"
400 },
401 method: 'GET',
402 timeout: 10000,
403 }, (error) => {
404 if (error) {
405 debug('render is not healthy', error);
406 return resolve(false);
407 }
408 debug('render is healthy');
409 resolve(true);
410 });
411 });
412}
413let isRenderHealthy = true;
414setInterval(() => {
415 renderHealthCheck().then((health) => {
416 isRenderHealthy = health;
417 });
418}, 1000 * 60);