UNPKG

12.3 kBPlain TextView Raw
1
2
3import * as async from 'async';
4import * as crypto from 'crypto';
5import * as fs from 'fs';
6import * as moment from 'moment';
7import * as OS from 'os';
8import * as request from 'request';
9import * as Debug from 'debug';
10import * as UrlParser from 'url';
11
12const debug = Debug('bablic:seo');
13
14import {ExtendedRequest, ExtendedResponse, Middleware, getLink, KeywordMapper, SiteMeta} from "./common";
15import {ServerResponse} from "http";
16import {Stats} from "fs";
17import {RequestResponse} from "request";
18
19export interface SeoOptions {
20 useCache?:boolean;
21 defaultCache?:string[];
22 test?:boolean;
23 altHost?: string;
24}
25
26export interface SeoSubDirOptions {
27 subDir: boolean;
28 subDirBase: string;
29 subDirOptional: boolean;
30}
31
32export class SeoMiddleware{
33 constructor(private siteId: string, private options: SeoOptions, private subDirOptions: SeoSubDirOptions){}
34 getHtml(url: string, locale: string, html?: string): Promise<string> {
35 debug('getting from bablic', url, 'html:', !!html );
36 let ld = '';
37 if(this.subDirOptions.subDir) {
38 ld = '&ld=subdir';
39 if(this.subDirOptions.subDirBase)
40 ld += '&sdb=' + encodeURIComponent(this.subDirOptions.subDirBase);
41 if(this.subDirOptions.subDirOptional)
42 ld += '&sdo=true';
43 }
44 return new Promise<string>((resolve, reject) => {
45 request({
46 url: SEO_ROOT + "?site=" + this.siteId + "&el=" + locale + "&url=" + (encodeURIComponent(url)) + ld,
47 method: 'POST',
48 json: {
49 html: html
50 }
51 }, (error:any, response:RequestResponse, body: any) => {
52 if (error)
53 return reject(error);
54
55 if (response.statusCode < 200 || response.statusCode >= 300)
56 return reject(new Error("Status-" + response.statusCode));
57
58 if (body == null)
59 return reject(new Error('empty response'));
60
61 debug('received translated html', response.statusCode);
62 resolve(body);
63 fs.writeFile(fullPathFromUrl(url), body, error => error && console.error('Error saving to cache', error));
64 });
65 });
66 }
67 getFromCache(url: string, skip: boolean, callback:(e?:Error, html?: string, isValid?: boolean) => void) {
68 if (!this.options.useCache || skip)
69 return callback();
70
71 let file_path = fullPathFromUrl(url);
72 fs.stat(file_path, (error:NodeJS.ErrnoException, file_stats: Stats) => {
73 if (error)
74 return callback(error);
75
76 fs.readFile(file_path, (error:NodeJS.ErrnoException, data: Buffer) => {
77 if (error)
78 return callback(error);
79 callback(error, data.toString('utf8'), cacheValid(file_stats));
80 });
81 });
82 };
83
84 middleware(){
85 return (meta:SiteMeta, keywordsByLocale: KeywordMapper, reverseKeywordByLocale: KeywordMapper, req: ExtendedRequest, res: ExtendedResponse, next: () => void) => {
86
87 let replaceUrls = shouldReplaceUrls(req);
88 if (!shouldHandle(req) && !replaceUrls) {
89 debug('ignored', req.url);
90 return next();
91 }
92
93 delete req.headers['accept-encoding'];
94 req.bablic.proxied = true;
95
96 let protocol = req.headers['x-forwarded-proto'] || 'http';
97 let my_url = protocol + "://" + req.headers.host + req.originalUrl;
98 if (this.options.altHost)
99 my_url = "http://" + this.options.altHost + req.originalUrl;
100
101
102 this.getFromCache(my_url, replaceUrls, (e, html, isValid) => {
103 let cache_only = false;
104 if (html) {
105 debug('flushing from cache');
106 res.setHeader('Content-Type', 'text/html; charset=utf-8');
107 res.setHeader('Content-Language', req.bablic.locale);
108 res.write(html);
109 res.end();
110 if (isValid)
111 return;
112 cache_only = true;
113 }
114
115 debug('overriding response');
116 let _end = res.end;
117 let _write = res.write;
118 let _writeHead = res.writeHead;
119
120 res.writeHead = (status, _headers) => {
121 res.statusCode = status;
122 if (_headers && typeof _headers === 'object') {
123 let results = [];
124 for (let key in _headers)
125 results.push(res.setHeader(key, _headers[key]));
126 return results;
127 }
128 };
129 let headers = {};
130 let _getHeader;
131 if (cache_only) {
132 _getHeader = res.getHeader;
133 res.setHeader = (name, value) => headers[name.toLowerCase().trim()] = value;
134 res.removeHeader = name => headers[name.toLowerCase().trim()] = null;
135 res.getHeader = name => {
136 let local = headers[name.toLowerCase().trim()];
137 if (local)
138 return local;
139 if (local === null)
140 return;
141 return _getHeader.call(res, name);
142 };
143 }
144 let restore_override = () => {
145 if (!_write || !_end || !_writeHead)
146 return;
147 debug('undo override');
148 res.write = _write;
149 res.end = _end;
150 res.writeHead = _writeHead;
151 if (cache_only)
152 _getHeader = null;
153
154 _write = _end = _writeHead = null;
155 };
156
157
158 let head_checked = false;
159 let is_html = null;
160 let chunks = [];
161 let check_head = () => {
162 if (head_checked)
163 return;
164
165 is_html = false;
166 if (typeof(res.getHeader('content-type')) !== 'undefined')
167 is_html = ((<string>res.getHeader('content-type')).indexOf('text/html') > -1) || replaceUrls;
168
169 if (!is_html) {
170 debug('not html', res.getHeader('content-type'));
171 restore_override();
172 }
173 if (res.statusCode < 200 || res.statusCode >= 300) {
174 debug('error response', res.statusCode);
175 is_html = false;
176 restore_override();
177 }
178 head_checked = true;
179 };
180
181
182 let justAnObject: any = <any>res;
183 res.write = function(chunk?: any, encoding?: any, cb?: any) {
184 check_head();
185 if (!is_html) {
186 if (cache_only)
187 return;
188
189 debug('write original');
190 return res.write.apply(res, arguments);
191 }
192 if (chunk instanceof Buffer)
193 chunk = (<Buffer>chunk).toString(encoding);
194 chunks.push(<string>chunk);
195 if(typeof(encoding) == 'function')
196 cb = <Function>encoding;
197 if(cb)
198 cb();
199 };
200
201
202 const self = this;
203 let alt_host = this.options.altHost;
204 justAnObject.end = function(chunk?: any, encoding?: any, cb?: any) {
205 if(typeof(encoding) == 'function'){
206 cb = <Function>encoding;
207 encoding = void(0);
208 }
209
210 check_head();
211 if (!is_html) {
212 if (cache_only)
213 return;
214 debug('flush original');
215 restore_override();
216 return res.end.apply(res, arguments);
217 }
218
219 if (chunk != null)
220 res.write.apply(res, arguments);
221
222 let original_html = chunks.join('');
223 res.setHeader('Content-Language', req.bablic.locale);
224 if (replaceUrls) {
225 restore_override();
226 html = original_html.replace(detect_url, url => {
227 if (ignore_not_html_or_xml.test(url))
228 return url;
229 if (url.indexOf(<string>req.headers.host) === -1 && (!alt_host || url.indexOf(alt_host) === -1))
230 return url;
231
232 let parsed = UrlParser.parse(url);
233 // translate URLs in sitemaps and such
234 if(keywordsByLocale && keywordsByLocale[req.bablic.locale]){
235 let keywords = keywordsByLocale[req.bablic.locale];
236 parsed.pathname = parsed.pathname.split('/').map(part => keywords[part] || part).join('/');
237 }
238 return getLink(req.bablic.locale, parsed, meta);
239 });
240 res.setHeader('Content-Length', Buffer.byteLength(html));
241 res.write(html, cb);
242 return res.end();
243 }
244 self.getHtml(my_url, req.bablic.locale, original_html).then((data) => {
245 if (cache_only)
246 return;
247 restore_override();
248 debug('flushing translated');
249 res.setHeader('Content-Length', Buffer.byteLength(data));
250 res.write(data, cb);
251 res.end();
252 }, (error) => {
253 if (cache_only)
254 return;
255 restore_override();
256 console.error('[Bablic SDK] Error:', error);
257 debug('flushing original');
258 res.write(original_html, cb);
259 res.end();
260 });
261 };
262 return next();
263 });
264 };
265 }
266}
267
268
269const ignore_not_html_or_xml = /\.(js|css|jpg|jpeg|png|mp3|avi|mpeg|bmp|wav|pdf|doc|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
270
271const detect_url = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
272
273let SEO_ROOT = 'http://seo.bablic.com/api/engine/seo';
274
275export function setRenderServer(url: string) {
276 if (!url) {
277 throw new Error("Must be a valid URL");
278 }
279 SEO_ROOT = url;
280}
281
282function hash(data){
283 return crypto.createHash('md5').update(data).digest('hex');
284}
285
286function fullPathFromUrl(url) {
287 return OS.tmpdir() + "/" + hash(url);
288}
289function cacheValid(file_stats) {
290 let last_modified = moment(file_stats.mtime.getTime());
291 let now = moment();
292 last_modified.add(30, 'minutes');
293 return now.isBefore(last_modified);
294}
295
296const filename_tester = /\.(js|css|jpg|jpeg|png|mp3|avi|mpeg|bmp|wav|pdf|doc|xml|docx|xlsx|xls|json|kml|svg|eot|woff|woff2)/i;
297function ignorable(req) {
298 return filename_tester.test(req.url);
299}
300const google_tester = /bot|crawler|baiduspider|facebook|twitter|80legs|google|seo/i;
301function isBot(req) {
302 return google_tester.test(req.headers['user-agent']);
303}
304
305function shouldHandle(req) {
306 return isBot(req) && !ignorable(req);
307}
308
309function shouldReplaceUrls(req) {
310 return /sitemap|robots/i.test(req.url);
311}
312