UNPKG

11.6 kBJavaScriptView Raw
1"use strict";
2Object.defineProperty(exports, "__esModule", { value: true });
3const crypto = require("crypto");
4const fs = require("fs");
5const moment = require("moment");
6const OS = require("os");
7const request = require("request");
8const Debug = require("debug");
9const UrlParser = require("url");
10const debug = Debug('bablic:seo');
11const common_1 = require("./common");
12class SeoMiddleware {
13 constructor(siteId, options, subDirOptions) {
14 this.siteId = siteId;
15 this.options = options;
16 this.subDirOptions = subDirOptions;
17 }
18 getHtml(url, locale, html) {
19 debug('getting from bablic', url, 'html:', !!html);
20 let ld = '';
21 if (this.subDirOptions.subDir) {
22 ld = '&ld=subdir';
23 if (this.subDirOptions.subDirBase)
24 ld += '&sdb=' + encodeURIComponent(this.subDirOptions.subDirBase);
25 if (this.subDirOptions.subDirOptional)
26 ld += '&sdo=true';
27 }
28 return new Promise((resolve, reject) => {
29 request({
30 url: SEO_ROOT + "?site=" + this.siteId + "&el=" + locale + "&url=" + (encodeURIComponent(url)) + ld,
31 method: 'POST',
32 json: {
33 html: html
34 }
35 }, (error, response, body) => {
36 if (error)
37 return reject(error);
38 if (response.statusCode < 200 || response.statusCode >= 300)
39 return reject(new Error("Status-" + response.statusCode));
40 if (body == null)
41 return reject(new Error('empty response'));
42 debug('received translated html', response.statusCode);
43 resolve(body);
44 fs.writeFile(fullPathFromUrl(url), body, error => error && console.error('Error saving to cache', error));
45 });
46 });
47 }
48 getFromCache(url, skip, callback) {
49 if (!this.options.useCache || skip)
50 return callback();
51 let file_path = fullPathFromUrl(url);
52 fs.stat(file_path, (error, file_stats) => {
53 if (error)
54 return callback(error);
55 fs.readFile(file_path, (error, data) => {
56 if (error)
57 return callback(error);
58 callback(error, data.toString('utf8'), cacheValid(file_stats));
59 });
60 });
61 }
62 ;
63 middleware() {
64 return (meta, keywordsByLocale, reverseKeywordByLocale, req, res, next) => {
65 let replaceUrls = shouldReplaceUrls(req);
66 if (!shouldHandle(req) && !replaceUrls) {
67 debug('ignored', req.url);
68 return next();
69 }
70 delete req.headers['accept-encoding'];
71 req.bablic.proxied = true;
72 let protocol = req.headers['x-forwarded-proto'] || 'http';
73 let my_url = protocol + "://" + req.headers.host + req.originalUrl;
74 if (this.options.altHost)
75 my_url = "http://" + this.options.altHost + req.originalUrl;
76 this.getFromCache(my_url, replaceUrls, (e, html, isValid) => {
77 let cache_only = false;
78 if (html) {
79 debug('flushing from cache');
80 res.setHeader('Content-Type', 'text/html; charset=utf-8');
81 res.setHeader('Content-Language', req.bablic.locale);
82 res.write(html);
83 res.end();
84 if (isValid)
85 return;
86 cache_only = true;
87 }
88 debug('overriding response');
89 let _end = res.end;
90 let _write = res.write;
91 let _writeHead = res.writeHead;
92 res.writeHead = (status, _headers) => {
93 res.statusCode = status;
94 if (_headers && typeof _headers === 'object') {
95 let results = [];
96 for (let key in _headers)
97 results.push(res.setHeader(key, _headers[key]));
98 return results;
99 }
100 };
101 let headers = {};
102 let _getHeader;
103 if (cache_only) {
104 _getHeader = res.getHeader;
105 res.setHeader = (name, value) => headers[name.toLowerCase().trim()] = value;
106 res.removeHeader = name => headers[name.toLowerCase().trim()] = null;
107 res.getHeader = name => {
108 let local = headers[name.toLowerCase().trim()];
109 if (local)
110 return local;
111 if (local === null)
112 return;
113 return _getHeader.call(res, name);
114 };
115 }
116 let restore_override = () => {
117 if (!_write || !_end || !_writeHead)
118 return;
119 debug('undo override');
120 res.write = _write;
121 res.end = _end;
122 res.writeHead = _writeHead;
123 if (cache_only)
124 _getHeader = null;
125 _write = _end = _writeHead = null;
126 };
127 let head_checked = false;
128 let is_html = null;
129 let chunks = [];
130 let check_head = () => {
131 if (head_checked)
132 return;
133 is_html = false;
134 if (typeof (res.getHeader('content-type')) !== 'undefined')
135 is_html = (res.getHeader('content-type').indexOf('text/html') > -1) || replaceUrls;
136 if (!is_html) {
137 debug('not html', res.getHeader('content-type'));
138 restore_override();
139 }
140 if (res.statusCode < 200 || res.statusCode >= 300) {
141 debug('error response', res.statusCode);
142 is_html = false;
143 restore_override();
144 }
145 head_checked = true;
146 };
147 let justAnObject = res;
148 res.write = function (chunk, encoding, cb) {
149 check_head();
150 if (!is_html) {
151 if (cache_only)
152 return;
153 debug('write original');
154 return res.write.apply(res, arguments);
155 }
156 if (chunk instanceof Buffer)
157 chunk = chunk.toString(encoding);
158 chunks.push(chunk);
159 if (typeof (encoding) == 'function')
160 cb = encoding;
161 if (cb)
162 cb();
163 };
164 const self = this;
165 let alt_host = this.options.altHost;
166 justAnObject.end = function (chunk, encoding, cb) {
167 if (typeof (encoding) == 'function') {
168 cb = encoding;
169 encoding = void (0);
170 }
171 check_head();
172 if (!is_html) {
173 if (cache_only)
174 return;
175 debug('flush original');
176 restore_override();
177 return res.end.apply(res, arguments);
178 }
179 if (chunk != null)
180 res.write.apply(res, arguments);
181 let original_html = chunks.join('');
182 res.setHeader('Content-Language', req.bablic.locale);
183 if (replaceUrls) {
184 restore_override();
185 html = original_html.replace(detect_url, url => {
186 if (ignore_not_html_or_xml.test(url))
187 return url;
188 if (url.indexOf(req.headers.host) === -1 && (!alt_host || url.indexOf(alt_host) === -1))
189 return url;
190 let parsed = UrlParser.parse(url);
191 // translate URLs in sitemaps and such
192 if (keywordsByLocale && keywordsByLocale[req.bablic.locale]) {
193 let keywords = keywordsByLocale[req.bablic.locale];
194 parsed.pathname = parsed.pathname.split('/').map(part => keywords[part] || part).join('/');
195 }
196 return common_1.getLink(req.bablic.locale, parsed, meta);
197 });
198 res.setHeader('Content-Length', Buffer.byteLength(html));
199 res.write(html, cb);
200 return res.end();
201 }
202 self.getHtml(my_url, req.bablic.locale, original_html).then((data) => {
203 if (cache_only)
204 return;
205 restore_override();
206 debug('flushing translated');
207 res.setHeader('Content-Length', Buffer.byteLength(data));
208 res.write(data, cb);
209 res.end();
210 }, (error) => {
211 if (cache_only)
212 return;
213 restore_override();
214 console.error('[Bablic SDK] Error:', error);
215 debug('flushing original');
216 res.write(original_html, cb);
217 res.end();
218 });
219 };
220 return next();
221 });
222 };
223 }
224}
225exports.SeoMiddleware = SeoMiddleware;
226const ignore_not_html_or_xml = /\.(js|css|jpg|jpeg|png|mp3|avi|mpeg|bmp|wav|pdf|doc|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
227const detect_url = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
228let SEO_ROOT = 'http://seo.bablic.com/api/engine/seo';
229function setRenderServer(url) {
230 if (!url) {
231 throw new Error("Must be a valid URL");
232 }
233 SEO_ROOT = url;
234}
235exports.setRenderServer = setRenderServer;
236function hash(data) {
237 return crypto.createHash('md5').update(data).digest('hex');
238}
239function fullPathFromUrl(url) {
240 return OS.tmpdir() + "/" + hash(url);
241}
242function cacheValid(file_stats) {
243 let last_modified = moment(file_stats.mtime.getTime());
244 let now = moment();
245 last_modified.add(30, 'minutes');
246 return now.isBefore(last_modified);
247}
248const filename_tester = /\.(js|css|jpg|jpeg|png|mp3|avi|mpeg|bmp|wav|pdf|doc|xml|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
249function ignorable(req) {
250 return filename_tester.test(req.url);
251}
252const google_tester = /bot|crawler|baiduspider|facebook|twitter|80legs|google|seo/i;
253function isBot(req) {
254 return google_tester.test(req.headers['user-agent']);
255}
256function shouldHandle(req) {
257 return isBot(req) && !ignorable(req);
258}
259function shouldReplaceUrls(req) {
260 return /sitemap|robots/i.test(req.url);
261}