1 | "use strict";
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | const crypto = require("crypto");
|
4 | const fs = require("fs");
|
5 | const moment = require("moment");
|
6 | const OS = require("os");
|
7 | const request = require("request");
|
8 | const Debug = require("debug");
|
9 | const UrlParser = require("url");
|
10 | const debug = Debug('bablic:seo');
|
11 | const common_1 = require("./common");
|
12 | class SeoMiddleware {
|
13 | constructor(siteId, options, subDirOptions) {
|
14 | this.siteId = siteId;
|
15 | this.options = options;
|
16 | this.subDirOptions = subDirOptions;
|
17 | }
|
18 | getHtml(url, locale, html) {
|
19 | debug('getting from bablic', url, 'html:', !!html);
|
20 | let ld = '';
|
21 | if (this.subDirOptions.subDir) {
|
22 | ld = '&ld=subdir';
|
23 | if (this.subDirOptions.subDirBase)
|
24 | ld += '&sdb=' + encodeURIComponent(this.subDirOptions.subDirBase);
|
25 | if (this.subDirOptions.subDirOptional)
|
26 | ld += '&sdo=true';
|
27 | }
|
28 | return new Promise((resolve, reject) => {
|
29 | request({
|
30 | url: SEO_ROOT + "?site=" + this.siteId + "&el=" + locale + "&url=" + (encodeURIComponent(url)) + ld,
|
31 | method: 'POST',
|
32 | json: {
|
33 | html: html
|
34 | }
|
35 | }, (error, response, body) => {
|
36 | if (error)
|
37 | return reject(error);
|
38 | if (response.statusCode < 200 || response.statusCode >= 300)
|
39 | return reject(new Error("Status-" + response.statusCode));
|
40 | if (body == null)
|
41 | return reject(new Error('empty response'));
|
42 | debug('received translated html', response.statusCode);
|
43 | resolve(body);
|
44 | fs.writeFile(fullPathFromUrl(url), body, error => error && console.error('Error saving to cache', error));
|
45 | });
|
46 | });
|
47 | }
|
48 | getFromCache(url, skip, callback) {
|
49 | if (!this.options.useCache || skip)
|
50 | return callback();
|
51 | let file_path = fullPathFromUrl(url);
|
52 | fs.stat(file_path, (error, file_stats) => {
|
53 | if (error)
|
54 | return callback(error);
|
55 | fs.readFile(file_path, (error, data) => {
|
56 | if (error)
|
57 | return callback(error);
|
58 | callback(error, data.toString('utf8'), cacheValid(file_stats));
|
59 | });
|
60 | });
|
61 | }
|
62 | ;
|
63 | middleware() {
|
64 | return (meta, keywordsByLocale, reverseKeywordByLocale, req, res, next) => {
|
65 | let replaceUrls = shouldReplaceUrls(req);
|
66 | if (!shouldHandle(req) && !replaceUrls) {
|
67 | debug('ignored', req.url);
|
68 | return next();
|
69 | }
|
70 | delete req.headers['accept-encoding'];
|
71 | req.bablic.proxied = true;
|
72 | let protocol = req.headers['x-forwarded-proto'] || 'http';
|
73 | let my_url = protocol + "://" + req.headers.host + req.originalUrl;
|
74 | if (this.options.altHost)
|
75 | my_url = "http://" + this.options.altHost + req.originalUrl;
|
76 | this.getFromCache(my_url, replaceUrls, (e, html, isValid) => {
|
77 | let cache_only = false;
|
78 | if (html) {
|
79 | debug('flushing from cache');
|
80 | res.setHeader('Content-Type', 'text/html; charset=utf-8');
|
81 | res.setHeader('Content-Language', req.bablic.locale);
|
82 | res.write(html);
|
83 | res.end();
|
84 | if (isValid)
|
85 | return;
|
86 | cache_only = true;
|
87 | }
|
88 | debug('overriding response');
|
89 | let _end = res.end;
|
90 | let _write = res.write;
|
91 | let _writeHead = res.writeHead;
|
92 | res.writeHead = (status, _headers) => {
|
93 | res.statusCode = status;
|
94 | if (_headers && typeof _headers === 'object') {
|
95 | let results = [];
|
96 | for (let key in _headers)
|
97 | results.push(res.setHeader(key, _headers[key]));
|
98 | return results;
|
99 | }
|
100 | };
|
101 | let headers = {};
|
102 | let _getHeader;
|
103 | if (cache_only) {
|
104 | _getHeader = res.getHeader;
|
105 | res.setHeader = (name, value) => headers[name.toLowerCase().trim()] = value;
|
106 | res.removeHeader = name => headers[name.toLowerCase().trim()] = null;
|
107 | res.getHeader = name => {
|
108 | let local = headers[name.toLowerCase().trim()];
|
109 | if (local)
|
110 | return local;
|
111 | if (local === null)
|
112 | return;
|
113 | return _getHeader.call(res, name);
|
114 | };
|
115 | }
|
116 | let restore_override = () => {
|
117 | if (!_write || !_end || !_writeHead)
|
118 | return;
|
119 | debug('undo override');
|
120 | res.write = _write;
|
121 | res.end = _end;
|
122 | res.writeHead = _writeHead;
|
123 | if (cache_only)
|
124 | _getHeader = null;
|
125 | _write = _end = _writeHead = null;
|
126 | };
|
127 | let head_checked = false;
|
128 | let is_html = null;
|
129 | let chunks = [];
|
130 | let check_head = () => {
|
131 | if (head_checked)
|
132 | return;
|
133 | is_html = false;
|
134 | if (typeof (res.getHeader('content-type')) !== 'undefined')
|
135 | is_html = (res.getHeader('content-type').indexOf('text/html') > -1) || replaceUrls;
|
136 | if (!is_html) {
|
137 | debug('not html', res.getHeader('content-type'));
|
138 | restore_override();
|
139 | }
|
140 | if (res.statusCode < 200 || res.statusCode >= 300) {
|
141 | debug('error response', res.statusCode);
|
142 | is_html = false;
|
143 | restore_override();
|
144 | }
|
145 | head_checked = true;
|
146 | };
|
147 | let justAnObject = res;
|
148 | res.write = function (chunk, encoding, cb) {
|
149 | check_head();
|
150 | if (!is_html) {
|
151 | if (cache_only)
|
152 | return;
|
153 | debug('write original');
|
154 | return res.write.apply(res, arguments);
|
155 | }
|
156 | if (chunk instanceof Buffer)
|
157 | chunk = chunk.toString(encoding);
|
158 | chunks.push(chunk);
|
159 | if (typeof (encoding) == 'function')
|
160 | cb = encoding;
|
161 | if (cb)
|
162 | cb();
|
163 | };
|
164 | const self = this;
|
165 | let alt_host = this.options.altHost;
|
166 | justAnObject.end = function (chunk, encoding, cb) {
|
167 | if (typeof (encoding) == 'function') {
|
168 | cb = encoding;
|
169 | encoding = void (0);
|
170 | }
|
171 | check_head();
|
172 | if (!is_html) {
|
173 | if (cache_only)
|
174 | return;
|
175 | debug('flush original');
|
176 | restore_override();
|
177 | return res.end.apply(res, arguments);
|
178 | }
|
179 | if (chunk != null)
|
180 | res.write.apply(res, arguments);
|
181 | let original_html = chunks.join('');
|
182 | res.setHeader('Content-Language', req.bablic.locale);
|
183 | if (replaceUrls) {
|
184 | restore_override();
|
185 | html = original_html.replace(detect_url, url => {
|
186 | if (ignore_not_html_or_xml.test(url))
|
187 | return url;
|
188 | if (url.indexOf(req.headers.host) === -1 && (!alt_host || url.indexOf(alt_host) === -1))
|
189 | return url;
|
190 | let parsed = UrlParser.parse(url);
|
191 |
|
192 | if (keywordsByLocale && keywordsByLocale[req.bablic.locale]) {
|
193 | let keywords = keywordsByLocale[req.bablic.locale];
|
194 | parsed.pathname = parsed.pathname.split('/').map(part => keywords[part] || part).join('/');
|
195 | }
|
196 | return common_1.getLink(req.bablic.locale, parsed, meta);
|
197 | });
|
198 | res.setHeader('Content-Length', Buffer.byteLength(html));
|
199 | res.write(html, cb);
|
200 | return res.end();
|
201 | }
|
202 | self.getHtml(my_url, req.bablic.locale, original_html).then((data) => {
|
203 | if (cache_only)
|
204 | return;
|
205 | restore_override();
|
206 | debug('flushing translated');
|
207 | res.setHeader('Content-Length', Buffer.byteLength(data));
|
208 | res.write(data, cb);
|
209 | res.end();
|
210 | }, (error) => {
|
211 | if (cache_only)
|
212 | return;
|
213 | restore_override();
|
214 | console.error('[Bablic SDK] Error:', error);
|
215 | debug('flushing original');
|
216 | res.write(original_html, cb);
|
217 | res.end();
|
218 | });
|
219 | };
|
220 | return next();
|
221 | });
|
222 | };
|
223 | }
|
224 | }
|
225 | exports.SeoMiddleware = SeoMiddleware;
|
226 | const ignore_not_html_or_xml = /\.(js|css|jpg|jpeg|png|mp3|avi|mpeg|bmp|wav|pdf|doc|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
|
227 | const detect_url = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
|
228 | let SEO_ROOT = 'http://seo.bablic.com/api/engine/seo';
|
229 | function setRenderServer(url) {
|
230 | if (!url) {
|
231 | throw new Error("Must be a valid URL");
|
232 | }
|
233 | SEO_ROOT = url;
|
234 | }
|
235 | exports.setRenderServer = setRenderServer;
|
236 | function hash(data) {
|
237 | return crypto.createHash('md5').update(data).digest('hex');
|
238 | }
|
239 | function fullPathFromUrl(url) {
|
240 | return OS.tmpdir() + "/" + hash(url);
|
241 | }
|
242 | function cacheValid(file_stats) {
|
243 | let last_modified = moment(file_stats.mtime.getTime());
|
244 | let now = moment();
|
245 | last_modified.add(30, 'minutes');
|
246 | return now.isBefore(last_modified);
|
247 | }
|
248 | const filename_tester = /\.(js|css|jpg|jpeg|png|mp3|avi|mpeg|bmp|wav|pdf|doc|xml|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
|
249 | function ignorable(req) {
|
250 | return filename_tester.test(req.url);
|
251 | }
|
252 | const google_tester = /bot|crawler|baiduspider|facebook|twitter|80legs|google|seo/i;
|
253 | function isBot(req) {
|
254 | return google_tester.test(req.headers['user-agent']);
|
255 | }
|
256 | function shouldHandle(req) {
|
257 | return isBot(req) && !ignorable(req);
|
258 | }
|
259 | function shouldReplaceUrls(req) {
|
260 | return /sitemap|robots/i.test(req.url);
|
261 | }
|