1 | import * as _ from "lodash";
|
2 | import * as cookie from "cookie";
|
3 | import * as fs from "fs";
|
4 | import * as moment from "moment";
|
5 | import * as OS from "os";
|
6 | import * as request from "request";
|
7 | import * as Debug from "debug";
|
8 | import * as url_parser from "url";
|
9 |
|
10 | import {SeoMiddleware, SeoOptions} from "./seo";
|
11 | import {
|
12 | ExtendedRequest, ExtendedResponse, getLocaleByURL, getLink, SiteMeta, KeywordMapper, LastModifiedByLocale,
|
13 | Middleware
|
14 | } from "./common";
|
15 | import {IncomingMessage, ServerResponse} from "http";
|
16 |
|
17 | const debug = Debug("bablic:seo");
|
18 |
|
19 | const BABLIC_ROOT = "https://www.bablic.com";
|
20 |
|
21 | function escapeRegex(str: string): string {
|
22 | return str.replace(/([.?+^$[\]\\(){}|-])/g, "\\$1");
|
23 | }
|
24 |
|
25 | export interface BablicOptions {
|
26 | siteId: string;
|
27 | rootUrl?: string;
|
28 | locale?: string;
|
29 | forceLocale?: string;
|
30 | subDir?: boolean;
|
31 | subDirBase?: string;
|
32 | subDirOptional?: boolean;
|
33 | onReady?: () => void;
|
34 | seo?: SeoOptions;
|
35 | folders?: {
|
36 | [locale: string]: string,
|
37 | };
|
38 | meta?: SiteMeta;
|
39 | snippet?: string;
|
40 | keywords?: {
|
41 | [urlKeyword: string]: {
|
42 | [locale: string]: string,
|
43 | },
|
44 | };
|
45 | lastModified?: LastModifiedByLocale;
|
46 | }
|
47 | export interface SiteData {
|
48 | id?: string;
|
49 | error?: string;
|
50 | snippet: string;
|
51 | meta: SiteMeta;
|
52 | keywords?: {
|
53 | [urlKeyword: string]: {
|
54 | [locale: string]: string,
|
55 | },
|
56 | };
|
57 |
|
58 | lastModified:LastModifiedByLocale
|
59 |
|
60 | }
|
61 |
|
62 | const Defaults: BablicOptions = {
|
63 | siteId: null,
|
64 | rootUrl: null,
|
65 | locale: null,
|
66 | subDir: false,
|
67 | subDirBase: "",
|
68 | subDirOptional: false,
|
69 | onReady: null,
|
70 | seo: {
|
71 | useCache: true,
|
72 | defaultCache: [],
|
73 | test: false,
|
74 | cacheDir: OS.tmpdir() + "/bpCache",
|
75 | },
|
76 | folders: null,
|
77 | };
|
78 |
|
79 | const BackwardCompOptions = {
|
80 | siteId: ["site_id"],
|
81 | rootUrl: ["root_url"],
|
82 | subDir: ["subdir", "sub_dir"],
|
83 | subDirBase: ["subdir_base"],
|
84 | subDirOptional: ["subdir_optional"],
|
85 | };
|
86 | export const BackwardCompSEOOptions = {
|
87 | useCache: ["use_cache"],
|
88 | defaultCache: ["default_cache"],
|
89 | };
|
90 |
|
91 | export class BablicSDK {
|
92 | public meta: SiteMeta = null;
|
93 | public lastModified: LastModifiedByLocale = null;
|
94 | public snippet = "";
|
95 | private options: BablicOptions;
|
96 | private LOCALE_REGEX: RegExp;
|
97 | private seoMiddleware: (meta: SiteMeta, lastModified:LastModifiedByLocale, keywordsByLocale: KeywordMapper, reverseKeywordByLocale: KeywordMapper, req: ExtendedRequest, res: ExtendedResponse, next: () => void) => void;
|
98 |
|
99 | private keywordsByLocale: KeywordMapper = null;
|
100 | private reverseKeywordByLocale: KeywordMapper = null;
|
101 | private seoHandler: SeoMiddleware;
|
102 | public handle: Middleware = (req, res, next) => this.handler(req, res, next);
|
103 | constructor(options: BablicOptions) {
|
104 | let generalOptions = options as any;
|
105 | for (let key in BackwardCompOptions) {
|
106 | if (!options[key]) {
|
107 | BackwardCompOptions[key].forEach((alt) => {
|
108 | if (generalOptions[alt]) {
|
109 | generalOptions[key] = generalOptions[alt];
|
110 | }
|
111 | });
|
112 | }
|
113 | }
|
114 | if (options.seo) {
|
115 | for (let key in BackwardCompSEOOptions) {
|
116 | if (!options.seo[key]) {
|
117 | BackwardCompSEOOptions[key].forEach((alt) => {
|
118 | if (generalOptions.seo[alt]) {
|
119 | generalOptions.seo[key] = generalOptions.seo[alt];
|
120 | }
|
121 | });
|
122 | }
|
123 | }
|
124 | }
|
125 |
|
126 | if (!options.siteId) {
|
127 | throw new Error("Middleware requires and site_id");
|
128 | }
|
129 |
|
130 | this.options = _.defaultsDeep(options, Defaults);
|
131 | this.seoHandler = new SeoMiddleware(this.options.siteId, this.options.seo, {subDir: this.options.subDir, subDirBase: this.options.subDirBase, subDirOptional: this.options.subDirOptional});
|
132 | this.seoMiddleware = this.seoHandler.middleware();
|
133 |
|
134 | if (this.options.meta) {
|
135 | this.meta = this.options.meta;
|
136 | this.processKeywords(this.options.keywords);
|
137 | }
|
138 | if (this.options.snippet) {
|
139 | this.snippet = this.options.snippet;
|
140 | }
|
141 | this.lastModified = this.options.lastModified;
|
142 |
|
143 | if (this.meta && this.snippet) {
|
144 | if (this.options.onReady) {
|
145 | this.options.onReady();
|
146 | }
|
147 | return;
|
148 | }
|
149 | this.loadSiteMeta(() => {
|
150 | if (this.options.onReady) {
|
151 | this.options.onReady();
|
152 | }
|
153 | });
|
154 | }
|
155 | public getSiteMeta(cbk: (e?: Error) => void) {
|
156 | debug("getting from bablic");
|
157 | request({
|
158 | method: "GET",
|
159 | url: `${BABLIC_ROOT}/api/v1/site/${this.options.siteId}?channel_id=node`,
|
160 | }, (error, response, body) => {
|
161 | if (error) {
|
162 | return cbk(error);
|
163 | }
|
164 | if (!body) {
|
165 | return cbk(new Error("empty response"));
|
166 | }
|
167 |
|
168 | try {
|
169 | let data: SiteData;
|
170 | if (typeof(body) === "string") {
|
171 | data = JSON.parse(body);
|
172 | } else {
|
173 | data = body;
|
174 | }
|
175 | debug("data:", data);
|
176 | this.saveSiteMeta(data);
|
177 | cbk();
|
178 | } catch (e) {
|
179 | debug(e);
|
180 | }
|
181 | });
|
182 | }
|
183 | public saveSiteMeta(data: SiteData) {
|
184 | let {snippet, meta, lastModified} = data;
|
185 | this.snippet = snippet;
|
186 | this.meta = meta;
|
187 | this.lastModified = lastModified;
|
188 | this.processKeywords(data.keywords);
|
189 | this.LOCALE_REGEX = null;
|
190 | data.id = this.options.siteId;
|
191 | fs.writeFile(this.snippetUrl(), JSON.stringify(data), (error) => {
|
192 | if (error) {
|
193 | console.error("Error saving snippet to cache", error);
|
194 | }
|
195 | });
|
196 | }
|
197 | public snippetUrl() {
|
198 | return `${OS.tmpdir()}/snippet.${this.options.siteId}`;
|
199 | }
|
200 | public getLocale(req: ExtendedRequest): string {
|
201 | if (req.headers["bablic-locale"]) {
|
202 | return req.headers["bablic-locale"] as string;
|
203 | }
|
204 |
|
205 | let auto = this.meta.autoDetect;
|
206 | let defaultLocale = this.meta.default;
|
207 | let customUrls = this.meta.customUrls;
|
208 | let localeKeys = this.meta.localeKeys.slice();
|
209 | localeKeys.push(this.meta.original);
|
210 | let localeDetection = this.meta.localeDetection;
|
211 | if (this.options.subDir) {
|
212 | localeDetection = "subdir";
|
213 | }
|
214 | return getLocaleByURL(
|
215 | url_parser.parse(getCurrentUrl(req)),
|
216 | localeDetection,
|
217 | customUrls,
|
218 | detectLocaleFromCookie(req, this.meta),
|
219 | defaultLocale,
|
220 | auto ? detectLocaleFromHeader(req) : "",
|
221 | false,
|
222 | this.options.locale,
|
223 | this.options.subDirBase,
|
224 | this.options.folders,
|
225 | localeKeys,
|
226 | );
|
227 |
|
228 | }
|
229 | public loadSiteMeta(cbk: (e?: Error) => void) {
|
230 | debug("loading meta from file");
|
231 | fs.readFile(this.snippetUrl(), (error, data) => {
|
232 | if (error) {
|
233 | debug("no local file, getting from server");
|
234 | return this.getSiteMeta(cbk);
|
235 | }
|
236 |
|
237 | debug("reading from temp file");
|
238 | try {
|
239 | let object: SiteData = JSON.parse(data.toString("utf8"));
|
240 | if (object.id != this.options.siteId || object.error) {
|
241 | debug("not of this site id");
|
242 | return this.getSiteMeta(cbk);
|
243 | }
|
244 | this.meta = object.meta;
|
245 | this.snippet = object.snippet;
|
246 | this.lastModified = object.lastModified;
|
247 | this.processKeywords(object.keywords);
|
248 | cbk();
|
249 | } catch (e) {
|
250 | debug(e);
|
251 | return this.getSiteMeta(cbk);
|
252 | }
|
253 |
|
254 | debug("checking snippet time");
|
255 | fs.stat(this.snippetUrl(), (e, stats) => {
|
256 | if (e) {
|
257 | return cbk();
|
258 | }
|
259 | let last_modified = moment(stats.mtime.getTime());
|
260 | if (last_modified > moment().subtract(4, "hours")) {
|
261 | return debug("snippet cache is good");
|
262 | }
|
263 | debug("refresh snippet");
|
264 | this.getSiteMeta(() => debug("refreshed snippet"));
|
265 | });
|
266 | });
|
267 | }
|
268 | public handleBablicCallback(req: ExtendedRequest, res: ExtendedResponse) {
|
269 | this.getSiteMeta(() => debug("site snippet refreshed"));
|
270 | res.end("OK");
|
271 | }
|
272 | public getLink(locale: string, url: string): string {
|
273 | let parsed = url_parser.parse(url);
|
274 | return getLink(locale, parsed, this.meta, {
|
275 | subDir: this.options.subDir,
|
276 | subDirBase: this.options.subDirBase,
|
277 | subDirOptional: this.options.subDirOptional,
|
278 | folders: this.options.folders,
|
279 | returnFull: true,
|
280 | });
|
281 | }
|
282 | public altTags(url: string, locale: string) {
|
283 | let locales = this.meta.localeKeys || [];
|
284 | let tags = _(locales)
|
285 | .concat([this.meta.original])
|
286 | .without(locale)
|
287 | .map((l: string) => `<link rel="alternate" href="${this.getLink(l, url)}" hreflang="${l == this.meta.original ? "x-default" : l}">`)
|
288 | .valueOf() as string[];
|
289 | return tags.join("");
|
290 | }
|
291 | public purgeCache(): Promise<void> {
|
292 | if (!this.seoHandler)
|
293 | return Promise.resolve();
|
294 | return this.seoHandler.purgeCache();
|
295 | }
|
296 | private generateOriginalPath(url: string, locale: string): string {
|
297 | let urlParts = url.split("?");
|
298 | let pathname = urlParts[0];
|
299 | let pathParts = pathname.split('.');
|
300 | let ext = pathParts.length > 1 ? '.' + pathParts[pathParts.length - 1] : '';
|
301 | let pathNoExt = pathParts.length > 1 ? pathParts.slice(0, pathParts.length - 1).join('.') : pathname;
|
302 | let reversed = this.reverseKeywordByLocale[locale];
|
303 | let original = pathNoExt.split("/").map((p) => reversed[p] || p).join("/");
|
304 | if (original != pathNoExt) {
|
305 | urlParts[0] = original + ext;
|
306 | return urlParts.join("?");
|
307 | } else {
|
308 | return null;
|
309 | }
|
310 | }
|
311 | private generateTranslatedPath(url: string, locale: string): string {
|
312 | let urlParts = url.split("?");
|
313 | let pathname = urlParts[0];
|
314 | let pathParts = pathname.split('.');
|
315 | let ext = pathParts.length > 1 ? '.' + pathParts[pathParts.length - 1] : '';
|
316 | let pathNoExt = pathParts.length > 1 ? pathParts.slice(0, pathParts.length - 1).join('.') : pathname;
|
317 | let proper = this.keywordsByLocale[locale];
|
318 | let translated = pathNoExt.split("/").map((p) => proper[p] || p).join("/");
|
319 | if (translated != pathNoExt) {
|
320 | urlParts[0] = translated + ext;
|
321 | return urlParts.join("?");
|
322 | } else {
|
323 | return null;
|
324 | }
|
325 | }
|
326 |
|
327 | private handler(_req: IncomingMessage, _res: ServerResponse, next: () => void) {
|
328 | const req = _req as ExtendedRequest;
|
329 | const res = _res as ExtendedResponse;
|
330 | if (!req.originalUrl) {
|
331 | req.originalUrl = req.url;
|
332 | }
|
333 | if ((req.originalUrl == "/_bablicCallback" && req.method == "POST") || req.headers["x-bablic-refresh"]) {
|
334 | debug("Redirecting to Bablic callback");
|
335 | return this.handleBablicCallback(req, res);
|
336 | }
|
337 | res.setHeader("x-bablic-id", this.options.siteId);
|
338 | if (!this.LOCALE_REGEX && this.options.subDir && this.meta && this.meta.localeKeys) {
|
339 | this.LOCALE_REGEX = RegExp("^(?:" + escapeRegex(this.options.subDirBase) + ")?\\/(" + this.meta.localeKeys.join("|") + ")\\b");
|
340 | }
|
341 | if (!this.meta) {
|
342 | debug("not loaded yet", req.originalUrl);
|
343 | req.bablic = {
|
344 | locale: "",
|
345 | };
|
346 | extendResponseLocals(res, {
|
347 | bablic: {
|
348 | locale: "",
|
349 | snippet: "",
|
350 | snippetBottom: "<!-- Bablic Footer OFF -->",
|
351 | snippetTop: "<!-- Bablic Head OFF -->",
|
352 | },
|
353 | });
|
354 |
|
355 | return next();
|
356 | }
|
357 |
|
358 |
|
359 | let locale = req.forceLocale || this.options.forceLocale || this.getLocale(req);
|
360 |
|
361 | req.bablic = {
|
362 | locale,
|
363 | proxied: false,
|
364 | };
|
365 |
|
366 | let _snippet = this.snippet;
|
367 |
|
368 | if (this.meta.original == locale) {
|
369 | _snippet = _snippet.replace("<script", "<script async");
|
370 | }
|
371 |
|
372 | if (this.options.subDir && this.LOCALE_REGEX) {
|
373 | req.url = req.url.replace(this.LOCALE_REGEX, "");
|
374 | req.originalUrl = req.originalUrl.replace(this.LOCALE_REGEX, "");
|
375 | _snippet = `<script>var bablic=bablic||{};bablic.localeURL="subdir";bablic.subDirBase="${this.options.subDirBase}";bablic.subDirOptional=${!!this.options.subDirOptional};</script>` + _snippet;
|
376 | }
|
377 |
|
378 | if (this.reverseKeywordByLocale && this.reverseKeywordByLocale[locale]) {
|
379 | let original = this.generateOriginalPath(req.url, locale);
|
380 |
|
381 | if (original) {
|
382 | req.url = original;
|
383 | req.originalUrl = this.generateOriginalPath(req.originalUrl, locale) || req.originalUrl;
|
384 | } else {
|
385 |
|
386 | let translated = this.generateTranslatedPath(req.originalUrl, locale);
|
387 | if (translated) {
|
388 | res.writeHead(301, {location: translated});
|
389 | return res.end();
|
390 | }
|
391 |
|
392 | }
|
393 | }
|
394 |
|
395 | let fullUrl = req.originalUrl;
|
396 | if (this.options.rootUrl) {
|
397 | const rootParsed = url_parser.parse(this.options.rootUrl);
|
398 | fullUrl = rootParsed.protocol + '//' + rootParsed.hostname + req.originalUrl;
|
399 | }
|
400 |
|
401 | extendResponseLocals(res, {
|
402 | bablic: {
|
403 | locale,
|
404 | snippet: _snippet,
|
405 | snippetBottom: "",
|
406 | snippetTop: "<!-- start Bablic Head -->" + this.altTags(fullUrl, locale) + _snippet + "<!-- start Bablic Head -->",
|
407 | },
|
408 | });
|
409 |
|
410 | if (!this.seoMiddleware) {
|
411 | return next();
|
412 | }
|
413 |
|
414 | if (locale == this.meta.original) {
|
415 | debug("ignored same language", req.url);
|
416 | return next();
|
417 | }
|
418 | return this.seoMiddleware(this.meta, this.lastModified, this.keywordsByLocale, this.reverseKeywordByLocale, req, res, next);
|
419 | }
|
420 |
|
421 | private processKeywords(keywords: {[keyword: string]: {[locale: string]: string}}) {
|
422 | if (!keywords) {
|
423 | return;
|
424 | }
|
425 |
|
426 | this.keywordsByLocale = {};
|
427 | this.reverseKeywordByLocale = {};
|
428 | this.meta.localeKeys.forEach((locale) => {
|
429 | let proper = {};
|
430 | let reverse = {};
|
431 | for (let keyword in keywords) {
|
432 | if (!keywords[keyword][locale]) {
|
433 | continue;
|
434 | }
|
435 | proper[keyword] = keywords[keyword][locale];
|
436 | reverse[keywords[keyword][locale]] = keyword;
|
437 | }
|
438 | this.keywordsByLocale[locale] = proper;
|
439 | this.reverseKeywordByLocale[locale] = reverse;
|
440 | });
|
441 | }
|
442 | }
|
443 |
|
444 | function extendResponseLocals(res: ExtendedResponse, context: {}) {
|
445 | if (typeof(res.locals) == "function") {
|
446 | res.locals(context);
|
447 | } else if (res.locals) {
|
448 | _.extend(res.locals, context);
|
449 | } else {
|
450 | res.locals = context;
|
451 | }
|
452 | }
|
453 |
|
454 | function detectLocaleFromHeader(req: ExtendedRequest): string {
|
455 | let header = req.headers["accept-language"];
|
456 | if (!header) {
|
457 | return "";
|
458 | }
|
459 | let langs = (header as string).split(",");
|
460 | if (langs.length > 0) {
|
461 | return langs[0].replace("-", "_");
|
462 | }
|
463 | return "";
|
464 | }
|
465 |
|
466 | function detectLocaleFromCookie(req: ExtendedRequest, meta: SiteMeta) {
|
467 | let cookieHeader = req.headers.cookie;
|
468 | if (!cookieHeader) {
|
469 | return "";
|
470 | }
|
471 | if (!meta.localeKeys) {
|
472 | return "";
|
473 | }
|
474 | let cookies = (req as any).cookies || cookie.parse(cookieHeader as string);
|
475 | if (!cookies) {
|
476 | return "";
|
477 | }
|
478 | let bablicCookie = cookies.bab_locale;
|
479 | if (!bablicCookie) {
|
480 | return "";
|
481 | }
|
482 | let index = meta.localeKeys.indexOf(bablicCookie);
|
483 | if (index > -1) {
|
484 | return bablicCookie;
|
485 | }
|
486 |
|
487 | let partialFound = _.find(meta.localeKeys, (l) => l[0] == bablicCookie[0] && l[1] == bablicCookie[1]);
|
488 | return partialFound || "";
|
489 | }
|
490 |
|
491 | function getCurrentUrl(req){
|
492 | return `http://${req.headers.host}${req.originalUrl}`;
|
493 | }
|
494 |
|
495 |
|
496 |
|