1 | const validation = {
|
2 | hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
|
3 | hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/,
|
4 | };
|
5 |
|
6 |
|
7 |
|
8 | function asArray(value)
|
9 | {
|
10 | return value instanceof Array ? value : [ value ];
|
11 | }
|
12 |
|
13 |
|
14 |
|
15 | function asyncForEach(iterable, iterator)
|
16 | {
|
17 | return Promise.all((iterable || [])
|
18 | .map(item => new Promise(resolve => setTimeout(() => resolve(iterator(item)), 1))));
|
19 | }
|
20 |
|
21 |
|
22 |
|
23 | function addDetected(app, pattern, type, value, key)
|
24 | {
|
25 | app.detected = true;
|
26 |
|
27 | app.confidence[ `${ type } ${ key ? `${ key } ` : '' }${ pattern.regex }` ] = pattern.confidence === undefined ? 100 : parseInt(pattern.confidence, 10);
|
28 |
|
29 | if (pattern.version)
|
30 | {
|
31 | const versions = [];
|
32 | const matches = pattern.regex.exec(value);
|
33 | let { version } = pattern;
|
34 | if (matches)
|
35 | {
|
36 | matches.forEach((match, i) =>
|
37 | {
|
38 |
|
39 | const ternary = new RegExp(`\\\\${ i }\\?([^:]+):(.*)$`)
|
40 | .exec(version);
|
41 | if (ternary && ternary.length === 3)
|
42 | {
|
43 | version = version.replace(ternary[ 0 ], match ? ternary[ 1 ] : ternary[ 2 ]);
|
44 | }
|
45 |
|
46 | version = version.trim()
|
47 | .replace(new RegExp(`\\\\${ i }`, 'g'), match || '');
|
48 | });
|
49 | if (version && versions.indexOf(version) === -1)
|
50 | {
|
51 | versions.push(version);
|
52 | }
|
53 | if (versions.length)
|
54 | {
|
55 |
|
56 | app.version = versions.reduce((a, b) => (a.length > b.length ? a : b));
|
57 | }
|
58 | }
|
59 | }
|
60 | }
|
61 |
|
62 | function resolveExcludes(apps, detected)
|
63 | {
|
64 | const excludes = [];
|
65 | const detectedApps = Object.assign({}, apps, detected);
|
66 |
|
67 | Object.keys(detectedApps)
|
68 | .forEach((appName) =>
|
69 | {
|
70 | const app = detectedApps[ appName ];
|
71 | if (app.props.excludes)
|
72 | {
|
73 | asArray(app.props.excludes)
|
74 | .forEach((excluded) =>
|
75 | {
|
76 | excludes.push(excluded);
|
77 | });
|
78 | }
|
79 | });
|
80 |
|
81 | Object.keys(apps)
|
82 | .forEach((appName) =>
|
83 | {
|
84 | if (excludes.indexOf(appName) > -1)
|
85 | {
|
86 | delete apps[ appName ];
|
87 | }
|
88 | });
|
89 | }
|
90 | class Application
|
91 | {
|
92 | constructor (name, props, detected)
|
93 | {
|
94 | this.confidence = {};
|
95 | this.confidenceTotal = 0;
|
96 | this.detected = Boolean(detected);
|
97 | this.excludes = [];
|
98 | this.name = name;
|
99 | this.props = props;
|
100 | this.version = '';
|
101 | }
|
102 | |
103 |
|
104 |
|
105 | getConfidence()
|
106 | {
|
107 | let total = 0;
|
108 | Object.keys(this.confidence)
|
109 | .forEach((id) =>
|
110 | {
|
111 | total += this.confidence[ id ];
|
112 | });
|
113 | this.confidenceTotal = Math.min(total, 100);
|
114 | return this.confidenceTotal;
|
115 | }
|
116 | }
|
117 | class Detector
|
118 | {
|
119 | constructor ()
|
120 | {
|
121 | this.apps = {};
|
122 | this.categories = {};
|
123 | this.driver = {};
|
124 | this.jsPatterns = {};
|
125 | this.detected = {};
|
126 | this.hostnameCache = {};
|
127 | this.adCache = [];
|
128 | this.config = {
|
129 | websiteURL: '',
|
130 | twitterURL: '',
|
131 | githubURL: '',
|
132 | };
|
133 | }
|
134 | |
135 |
|
136 |
|
137 | log(message, source, type)
|
138 | {
|
139 | if (this.driver.log)
|
140 | {
|
141 | this.driver.log(message, source || '', type || 'debug');
|
142 | }
|
143 | }
|
144 | analyze(url, data, context)
|
145 | {
|
146 | const apps = {};
|
147 | const promises = [];
|
148 | const startTime = new Date();
|
149 | const {
|
150 | scripts,
|
151 | cookies,
|
152 | headers,
|
153 | js,
|
154 | } = data;
|
155 | let { html } = data;
|
156 | if (this.detected[ url.canonical ] === undefined)
|
157 | {
|
158 | this.detected[ url.canonical ] = {};
|
159 | }
|
160 | const metaTags = [];
|
161 |
|
162 | let language = null;
|
163 | if (html)
|
164 | {
|
165 | if (typeof html !== 'string')
|
166 | {
|
167 | html = '';
|
168 | }
|
169 | let matches = data.html.match(new RegExp('<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i'));
|
170 | language = matches && matches.length ? matches[ 1 ] : null;
|
171 |
|
172 | const regex = /<meta[^>]+>/ig;
|
173 | do
|
174 | {
|
175 | matches = regex.exec(html);
|
176 | if (!matches)
|
177 | {
|
178 | break;
|
179 | }
|
180 | metaTags.push(matches[ 0 ]);
|
181 | } while (matches);
|
182 | }
|
183 | Object.keys(this.apps)
|
184 | .forEach((appName) =>
|
185 | {
|
186 | apps[ appName ] = this.detected[ url.canonical ] && this.detected[ url.canonical ][ appName ] ? this.detected[ url.canonical ][ appName ] : new Application(appName, this.apps[ appName ]);
|
187 | const app = apps[ appName ];
|
188 | promises.push(this.analyzeUrl(app, url));
|
189 | if (html)
|
190 | {
|
191 | promises.push(this.analyzeHtml(app, html));
|
192 | promises.push(this.analyzeMeta(app, metaTags));
|
193 | }
|
194 | if (scripts)
|
195 | {
|
196 | promises.push(this.analyzeScripts(app, scripts));
|
197 | }
|
198 | if (cookies)
|
199 | {
|
200 | promises.push(this.analyzeCookies(app, cookies));
|
201 | }
|
202 | if (headers)
|
203 | {
|
204 | promises.push(this.analyzeHeaders(app, headers));
|
205 | }
|
206 | });
|
207 | if (js)
|
208 | {
|
209 | Object.keys(js)
|
210 | .forEach((appName) =>
|
211 | {
|
212 | if (typeof js[ appName ] !== 'function')
|
213 | {
|
214 | promises.push(this.analyzeJs(apps[ appName ], js[ appName ]));
|
215 | }
|
216 | });
|
217 | }
|
218 | return new Promise(async (resolve) =>
|
219 | {
|
220 | await Promise.all(promises);
|
221 | Object.keys(apps)
|
222 | .forEach((appName) =>
|
223 | {
|
224 | const app = apps[ appName ];
|
225 | if (!app.detected || !app.getConfidence())
|
226 | {
|
227 | delete apps[ app.name ];
|
228 | }
|
229 | });
|
230 | resolveExcludes(apps, this.detected[ url ]);
|
231 | this.resolveImplies(apps, url.canonical);
|
232 | this.cacheDetectedApps(apps, url.canonical);
|
233 | this.trackDetectedApps(apps, url, language);
|
234 | this.log(`Processing ${ Object.keys(data).join(', ') } took ${ ((new Date() - startTime) / 1000).toFixed(2) }s (${ url.hostname })`, 'core');
|
235 | if (Object.keys(apps)
|
236 | .length)
|
237 | {
|
238 | this.log(`Identified ${ Object.keys(apps).join(', ') } (${ url.hostname })`, 'core');
|
239 | }
|
240 | this.driver.displayApps(this.detected[ url.canonical ], { language }, context);
|
241 | return resolve();
|
242 | });
|
243 | }
|
244 | |
245 |
|
246 |
|
247 | cacheDetectedAds(ad)
|
248 | {
|
249 | this.adCache.push(ad);
|
250 | }
|
251 | |
252 |
|
253 |
|
254 | robotsTxtAllows(url)
|
255 | {
|
256 | return new Promise(async (resolve, reject) =>
|
257 | {
|
258 | const parsed = this.parseUrl(url);
|
259 | if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
260 | {
|
261 | return reject();
|
262 | }
|
263 | const robotsTxt = await this.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:');
|
264 | if (robotsTxt.some(disallowedPath => parsed.pathname.indexOf(disallowedPath) === 0))
|
265 | {
|
266 | return reject();
|
267 | }
|
268 | return resolve();
|
269 | });
|
270 | }
|
271 | |
272 |
|
273 |
|
274 | parseUrl(url)
|
275 | {
|
276 | const a = this.driver.document.createElement('a');
|
277 | a.href = url;
|
278 | a.canonical = `${ a.protocol }//${ a.host }${ a.pathname }`;
|
279 | return a;
|
280 | }
|
281 | |
282 |
|
283 |
|
284 | static parseRobotsTxt(robotsTxt)
|
285 | {
|
286 | const disallow = [];
|
287 | let userAgent;
|
288 | robotsTxt.split('\n')
|
289 | .forEach((line) =>
|
290 | {
|
291 | let matches = /^User-agent:\s*(.+)$/i.exec(line.trim());
|
292 | if (matches)
|
293 | {
|
294 | userAgent = matches[ 1 ].toLowerCase();
|
295 | } else if (userAgent === '*' || userAgent === 'detector')
|
296 | {
|
297 | matches = /^Disallow:\s*(.+)$/i.exec(line.trim());
|
298 | if (matches)
|
299 | {
|
300 | disallow.push(matches[ 1 ]);
|
301 | }
|
302 | }
|
303 | });
|
304 | return disallow;
|
305 | }
|
306 | |
307 |
|
308 |
|
309 | ping()
|
310 | {
|
311 | if (Object.keys(this.hostnameCache)
|
312 | .length > 100)
|
313 | {
|
314 | this.driver.ping(this.hostnameCache);
|
315 | this.hostnameCache = {};
|
316 | }
|
317 | if (this.adCache.length > 50)
|
318 | {
|
319 | this.driver.ping({}, this.adCache);
|
320 | this.adCache = [];
|
321 | }
|
322 | }
|
323 | |
324 |
|
325 |
|
326 | parsePatterns(patterns)
|
327 | {
|
328 | if (!patterns)
|
329 | {
|
330 | return [];
|
331 | }
|
332 | let parsed = {};
|
333 |
|
334 | if (typeof patterns === 'string' || patterns instanceof Array)
|
335 | {
|
336 | patterns = {
|
337 | main: asArray(patterns),
|
338 | };
|
339 | }
|
340 | Object.keys(patterns)
|
341 | .forEach((key) =>
|
342 | {
|
343 | parsed[ key ] = [];
|
344 | asArray(patterns[ key ])
|
345 | .forEach((pattern) =>
|
346 | {
|
347 | const attrs = {};
|
348 | pattern.split('\\;')
|
349 | .forEach((attr, i) =>
|
350 | {
|
351 | if (i)
|
352 | {
|
353 |
|
354 | attr = attr.split(':');
|
355 | if (attr.length > 1)
|
356 | {
|
357 | attrs[ attr.shift() ] = attr.join(':');
|
358 | }
|
359 | } else
|
360 | {
|
361 | attrs.string = attr;
|
362 | try
|
363 | {
|
364 | attrs.regex = new RegExp(attr.replace('/', '\/'), 'i');
|
365 | } catch (error)
|
366 | {
|
367 | attrs.regex = new RegExp();
|
368 | this.log(`${ error.message }: ${ attr }`, 'error', 'core');
|
369 | }
|
370 | }
|
371 | });
|
372 | parsed[ key ].push(attrs);
|
373 | });
|
374 | });
|
375 |
|
376 | if ('main' in parsed)
|
377 | {
|
378 | parsed = parsed.main;
|
379 | }
|
380 | return parsed;
|
381 | }
|
382 | |
383 |
|
384 |
|
385 | parseJsPatterns()
|
386 | {
|
387 | Object.keys(this.apps)
|
388 | .forEach((appName) =>
|
389 | {
|
390 | if (this.apps[ appName ].js)
|
391 | {
|
392 | this.jsPatterns[ appName ] = this.parsePatterns(this.apps[ appName ].js);
|
393 | }
|
394 | });
|
395 | }
|
396 | resolveImplies(apps, url)
|
397 | {
|
398 | let checkImplies = true;
|
399 | const resolve = (appName) =>
|
400 | {
|
401 | const app = apps[ appName ];
|
402 | if (app && app.props.implies)
|
403 | {
|
404 | asArray(app.props.implies)
|
405 | .forEach((implied) =>
|
406 | {
|
407 | [ implied ] = this.parsePatterns(implied);
|
408 | if (!this.apps[ implied.string ])
|
409 | {
|
410 | this.log(`Implied application ${ implied.string } does not exist`, 'core', 'warn');
|
411 | return;
|
412 | }
|
413 | if (!(implied.string in apps))
|
414 | {
|
415 | apps[ implied.string ] = this.detected[ url ] && this.detected[ url ][ implied.string ] ? this.detected[ url ][ implied.string ] : new Application(implied.string, this.apps[ implied.string ], true);
|
416 | checkImplies = true;
|
417 | }
|
418 |
|
419 | Object.keys(app.confidence)
|
420 | .forEach((id) =>
|
421 | {
|
422 | apps[ implied.string ].confidence[ `${ id } implied by ${ appName }` ] = app.confidence[ id ] * (implied.confidence === undefined ? 1 : implied.confidence / 100);
|
423 | });
|
424 | });
|
425 | }
|
426 | };
|
427 |
|
428 |
|
429 | while (checkImplies)
|
430 | {
|
431 | checkImplies = false;
|
432 | Object.keys(apps)
|
433 | .forEach(resolve);
|
434 | }
|
435 | }
|
436 | |
437 |
|
438 |
|
439 | cacheDetectedApps(apps, url)
|
440 | {
|
441 | Object.keys(apps)
|
442 | .forEach((appName) =>
|
443 | {
|
444 | const app = apps[ appName ];
|
445 |
|
446 | this.detected[ url ][ appName ] = app;
|
447 | Object.keys(app.confidence)
|
448 | .forEach((id) =>
|
449 | {
|
450 | this.detected[ url ][ appName ].confidence[ id ] = app.confidence[ id ];
|
451 | });
|
452 | });
|
453 | if (this.driver.ping instanceof Function)
|
454 | {
|
455 | this.ping();
|
456 | }
|
457 | }
|
458 | |
459 |
|
460 |
|
461 | trackDetectedApps(apps, url, language)
|
462 | {
|
463 | if (!(this.driver.ping instanceof Function))
|
464 | {
|
465 | return;
|
466 | }
|
467 | const hostname = `${ url.protocol }//${ url.hostname }`;
|
468 | Object.keys(apps)
|
469 | .forEach((appName) =>
|
470 | {
|
471 | const app = apps[ appName ];
|
472 | if (this.detected[ url.canonical ][ appName ].getConfidence() >= 100)
|
473 | {
|
474 | if (validation.hostname.test(url.hostname) && !validation.hostnameBlacklist.test(url.hostname))
|
475 | {
|
476 | if (!(hostname in this.hostnameCache))
|
477 | {
|
478 | this.hostnameCache[ hostname ] = {
|
479 | applications: {},
|
480 | meta: {},
|
481 | };
|
482 | }
|
483 | if (!(appName in this.hostnameCache[ hostname ].applications))
|
484 | {
|
485 | this.hostnameCache[ hostname ].applications[ appName ] = {
|
486 | hits: 0,
|
487 | };
|
488 | }
|
489 | this.hostnameCache[ hostname ].applications[ appName ].hits += 1;
|
490 | if (apps[ appName ].version)
|
491 | {
|
492 | this.hostnameCache[ hostname ].applications[ appName ].version = app.version;
|
493 | }
|
494 | }
|
495 | }
|
496 | });
|
497 | if (hostname in this.hostnameCache)
|
498 | {
|
499 | this.hostnameCache[ hostname ].meta.language = language;
|
500 | }
|
501 | this.ping();
|
502 | }
|
503 | |
504 |
|
505 |
|
506 | analyzeUrl(app, url)
|
507 | {
|
508 | const patterns = this.parsePatterns(app.props.url);
|
509 | if (!patterns.length)
|
510 | {
|
511 | return Promise.resolve();
|
512 | }
|
513 | return asyncForEach(patterns, (pattern) =>
|
514 | {
|
515 | if (pattern.regex.test(url.canonical))
|
516 | {
|
517 | addDetected(app, pattern, 'url', url.canonical);
|
518 | }
|
519 | });
|
520 | }
|
521 | |
522 |
|
523 |
|
524 | analyzeHtml(app, html)
|
525 | {
|
526 | const patterns = this.parsePatterns(app.props.html);
|
527 | if (!patterns.length)
|
528 | {
|
529 | return Promise.resolve();
|
530 | }
|
531 | return asyncForEach(patterns, (pattern) =>
|
532 | {
|
533 | if (pattern.regex.test(html))
|
534 | {
|
535 | addDetected(app, pattern, 'html', html);
|
536 | }
|
537 | });
|
538 | }
|
539 | |
540 |
|
541 |
|
542 | analyzeScripts(app, scripts)
|
543 | {
|
544 | const patterns = this.parsePatterns(app.props.script);
|
545 | if (!patterns.length)
|
546 | {
|
547 | return Promise.resolve();
|
548 | }
|
549 | return asyncForEach(patterns, (pattern) =>
|
550 | {
|
551 | scripts.forEach((uri) =>
|
552 | {
|
553 | if (pattern.regex.test(uri))
|
554 | {
|
555 | addDetected(app, pattern, 'script', uri);
|
556 | }
|
557 | });
|
558 | });
|
559 | }
|
560 | |
561 |
|
562 |
|
563 | analyzeMeta(app, metaTags)
|
564 | {
|
565 | const patterns = this.parsePatterns(app.props.meta);
|
566 | const promises = [];
|
567 | if (!app.props.meta)
|
568 | {
|
569 | return Promise.resolve();
|
570 | }
|
571 | metaTags.forEach((match) =>
|
572 | {
|
573 | Object.keys(patterns)
|
574 | .forEach((meta) =>
|
575 | {
|
576 | const r = new RegExp(`(?:name|property)=["']${ meta }["']`, 'i');
|
577 | if (r.test(match))
|
578 | {
|
579 | const content = match.match(/content=("|')([^"']+)("|')/i);
|
580 | promises.push(asyncForEach(patterns[ meta ], (pattern) =>
|
581 | {
|
582 | if (content && content.length === 4 && pattern.regex.test(content[ 2 ]))
|
583 | {
|
584 | addDetected(app, pattern, 'meta', content[ 2 ], meta);
|
585 | }
|
586 | }));
|
587 | }
|
588 | });
|
589 | });
|
590 | return Promise.all(promises);
|
591 | }
|
592 | |
593 |
|
594 |
|
595 | analyzeHeaders(app, headers)
|
596 | {
|
597 | const patterns = this.parsePatterns(app.props.headers);
|
598 | const promises = [];
|
599 | Object.keys(patterns)
|
600 | .forEach((headerName) =>
|
601 | {
|
602 | if (typeof patterns[ headerName ] !== 'function')
|
603 | {
|
604 | promises.push(asyncForEach(patterns[ headerName ], (pattern) =>
|
605 | {
|
606 | headerName = headerName.toLowerCase();
|
607 | if (headerName in headers)
|
608 | {
|
609 | headers[ headerName ].forEach((headerValue) =>
|
610 | {
|
611 | if (pattern.regex.test(headerValue))
|
612 | {
|
613 | addDetected(app, pattern, 'headers', headerValue, headerName);
|
614 | }
|
615 | });
|
616 | }
|
617 | }));
|
618 | }
|
619 | });
|
620 | return promises ? Promise.all(promises) : Promise.resolve();
|
621 | }
|
622 | |
623 |
|
624 |
|
625 | analyzeCookies(app, cookies)
|
626 | {
|
627 | const patterns = this.parsePatterns(app.props.cookies);
|
628 | const promises = [];
|
629 | Object.keys(patterns)
|
630 | .forEach((cookieName) =>
|
631 | {
|
632 | if (typeof patterns[ cookieName ] !== 'function')
|
633 | {
|
634 | const cookieNameLower = cookieName.toLowerCase();
|
635 | promises.push(asyncForEach(patterns[ cookieName ], (pattern) =>
|
636 | {
|
637 | const cookie = cookies.find(_cookie => _cookie.name.toLowerCase() === cookieNameLower);
|
638 | if (cookie && pattern.regex.test(cookie.value))
|
639 | {
|
640 | addDetected(app, pattern, 'cookies', cookie.value, cookieName);
|
641 | }
|
642 | }));
|
643 | }
|
644 | });
|
645 | return promises ? Promise.all(promises) : Promise.resolve();
|
646 | }
|
647 | |
648 |
|
649 |
|
650 | analyzeJs(app, results)
|
651 | {
|
652 | const promises = [];
|
653 | Object.keys(results)
|
654 | .forEach((string) =>
|
655 | {
|
656 | if (typeof results[ string ] !== 'function')
|
657 | {
|
658 | promises.push(asyncForEach(Object.keys(results[ string ]), (index) =>
|
659 | {
|
660 | const pattern = this.jsPatterns[ app.name ][ string ][ index ];
|
661 | const value = results[ string ][ index ];
|
662 | if (pattern && pattern.regex.test(value))
|
663 | {
|
664 | addDetected(app, pattern, 'js', value, string);
|
665 | }
|
666 | }));
|
667 | }
|
668 | });
|
669 | return promises ? Promise.all(promises) : Promise.resolve();
|
670 | }
|
671 | }
|
672 | if (typeof module === 'object')
|
673 | {
|
674 | module.exports = Detector;
|
675 | } |
\ | No newline at end of file |