UNPKG

17.6 kBJavaScriptView Raw
1const validation = {
2 hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
3 hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/,
4};
5/**
6 * Enclose string in array
7 */
8function asArray(value)
9{
10 return value instanceof Array ? value : [ value ];
11}
12/**
13 *
14 */
15function asyncForEach(iterable, iterator)
16{
17 return Promise.all((iterable || [])
18 .map(item => new Promise(resolve => setTimeout(() => resolve(iterator(item)), 1))));
19}
20/**
21 * Mark application as detected, set confidence and version
22 */
23function addDetected(app, pattern, type, value, key)
24{
25 app.detected = true;
26 // Set confidence level
27 app.confidence[ `${ type } ${ key ? `${ key } ` : '' }${ pattern.regex }` ] = pattern.confidence === undefined ? 100 : parseInt(pattern.confidence, 10);
28 // Detect version number
29 if (pattern.version)
30 {
31 const versions = [];
32 const matches = pattern.regex.exec(value);
33 let { version } = pattern;
34 if (matches)
35 {
36 matches.forEach((match, i) =>
37 {
38 // Parse ternary operator
39 const ternary = new RegExp(`\\\\${ i }\\?([^:]+):(.*)$`)
40 .exec(version);
41 if (ternary && ternary.length === 3)
42 {
43 version = version.replace(ternary[ 0 ], match ? ternary[ 1 ] : ternary[ 2 ]);
44 }
45 // Replace back references
46 version = version.trim()
47 .replace(new RegExp(`\\\\${ i }`, 'g'), match || '');
48 });
49 if (version && versions.indexOf(version) === -1)
50 {
51 versions.push(version);
52 }
53 if (versions.length)
54 {
55 // Use the longest detected version number
56 app.version = versions.reduce((a, b) => (a.length > b.length ? a : b));
57 }
58 }
59 }
60}
61
62function resolveExcludes(apps, detected)
63{
64 const excludes = [];
65 const detectedApps = Object.assign({}, apps, detected);
66 // Exclude app in detected apps only
67 Object.keys(detectedApps)
68 .forEach((appName) =>
69 {
70 const app = detectedApps[ appName ];
71 if (app.props.excludes)
72 {
73 asArray(app.props.excludes)
74 .forEach((excluded) =>
75 {
76 excludes.push(excluded);
77 });
78 }
79 });
80 // Remove excluded applications
81 Object.keys(apps)
82 .forEach((appName) =>
83 {
84 if (excludes.indexOf(appName) > -1)
85 {
86 delete apps[ appName ];
87 }
88 });
89}
90class Application
91{
92 constructor (name, props, detected)
93 {
94 this.confidence = {};
95 this.confidenceTotal = 0;
96 this.detected = Boolean(detected);
97 this.excludes = [];
98 this.name = name;
99 this.props = props;
100 this.version = '';
101 }
102 /**
103 * Calculate confidence total
104 */
105 getConfidence()
106 {
107 let total = 0;
108 Object.keys(this.confidence)
109 .forEach((id) =>
110 {
111 total += this.confidence[ id ];
112 });
113 this.confidenceTotal = Math.min(total, 100);
114 return this.confidenceTotal;
115 }
116}
117class Detector
118{
119 constructor ()
120 {
121 this.apps = {};
122 this.categories = {};
123 this.driver = {};
124 this.jsPatterns = {};
125 this.detected = {};
126 this.hostnameCache = {};
127 this.adCache = [];
128 this.config = {
129 websiteURL: '',
130 twitterURL: '',
131 githubURL: '',
132 };
133 }
134 /**
135 * Log messages to console
136 */
137 log(message, source, type)
138 {
139 if (this.driver.log)
140 {
141 this.driver.log(message, source || '', type || 'debug');
142 }
143 }
144 analyze(url, data, context)
145 {
146 const apps = {};
147 const promises = [];
148 const startTime = new Date();
149 const {
150 scripts,
151 cookies,
152 headers,
153 js,
154 } = data;
155 let { html } = data;
156 if (this.detected[ url.canonical ] === undefined)
157 {
158 this.detected[ url.canonical ] = {};
159 }
160 const metaTags = [];
161 // Additional information
162 let language = null;
163 if (html)
164 {
165 if (typeof html !== 'string')
166 {
167 html = '';
168 }
169 let matches = data.html.match(new RegExp('<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i'));
170 language = matches && matches.length ? matches[ 1 ] : null;
171 // Meta tags
172 const regex = /<meta[^>]+>/ig;
173 do
174 {
175 matches = regex.exec(html);
176 if (!matches)
177 {
178 break;
179 }
180 metaTags.push(matches[ 0 ]);
181 } while (matches);
182 }
183 Object.keys(this.apps)
184 .forEach((appName) =>
185 {
186 apps[ appName ] = this.detected[ url.canonical ] && this.detected[ url.canonical ][ appName ] ? this.detected[ url.canonical ][ appName ] : new Application(appName, this.apps[ appName ]);
187 const app = apps[ appName ];
188 promises.push(this.analyzeUrl(app, url));
189 if (html)
190 {
191 promises.push(this.analyzeHtml(app, html));
192 promises.push(this.analyzeMeta(app, metaTags));
193 }
194 if (scripts)
195 {
196 promises.push(this.analyzeScripts(app, scripts));
197 }
198 if (cookies)
199 {
200 promises.push(this.analyzeCookies(app, cookies));
201 }
202 if (headers)
203 {
204 promises.push(this.analyzeHeaders(app, headers));
205 }
206 });
207 if (js)
208 {
209 Object.keys(js)
210 .forEach((appName) =>
211 {
212 if (typeof js[ appName ] !== 'function')
213 {
214 promises.push(this.analyzeJs(apps[ appName ], js[ appName ]));
215 }
216 });
217 }
218 return new Promise(async (resolve) =>
219 {
220 await Promise.all(promises);
221 Object.keys(apps)
222 .forEach((appName) =>
223 {
224 const app = apps[ appName ];
225 if (!app.detected || !app.getConfidence())
226 {
227 delete apps[ app.name ];
228 }
229 });
230 resolveExcludes(apps, this.detected[ url ]);
231 this.resolveImplies(apps, url.canonical);
232 this.cacheDetectedApps(apps, url.canonical);
233 this.trackDetectedApps(apps, url, language);
234 this.log(`Processing ${ Object.keys(data).join(', ') } took ${ ((new Date() - startTime) / 1000).toFixed(2) }s (${ url.hostname })`, 'core');
235 if (Object.keys(apps)
236 .length)
237 {
238 this.log(`Identified ${ Object.keys(apps).join(', ') } (${ url.hostname })`, 'core');
239 }
240 this.driver.displayApps(this.detected[ url.canonical ], { language }, context);
241 return resolve();
242 });
243 }
244 /**
245 * Cache detected ads
246 */
247 cacheDetectedAds(ad)
248 {
249 this.adCache.push(ad);
250 }
251 /**
252 *
253 */
254 robotsTxtAllows(url)
255 {
256 return new Promise(async (resolve, reject) =>
257 {
258 const parsed = this.parseUrl(url);
259 if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
260 {
261 return reject();
262 }
263 const robotsTxt = await this.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:');
264 if (robotsTxt.some(disallowedPath => parsed.pathname.indexOf(disallowedPath) === 0))
265 {
266 return reject();
267 }
268 return resolve();
269 });
270 }
271 /**
272 * Parse a URL
273 */
274 parseUrl(url)
275 {
276 const a = this.driver.document.createElement('a');
277 a.href = url;
278 a.canonical = `${ a.protocol }//${ a.host }${ a.pathname }`;
279 return a;
280 }
281 /**
282 *
283 */
284 static parseRobotsTxt(robotsTxt)
285 {
286 const disallow = [];
287 let userAgent;
288 robotsTxt.split('\n')
289 .forEach((line) =>
290 {
291 let matches = /^User-agent:\s*(.+)$/i.exec(line.trim());
292 if (matches)
293 {
294 userAgent = matches[ 1 ].toLowerCase();
295 } else if (userAgent === '*' || userAgent === 'detector')
296 {
297 matches = /^Disallow:\s*(.+)$/i.exec(line.trim());
298 if (matches)
299 {
300 disallow.push(matches[ 1 ]);
301 }
302 }
303 });
304 return disallow;
305 }
306 /**
307 *
308 */
309 ping()
310 {
311 if (Object.keys(this.hostnameCache)
312 .length > 100)
313 {
314 this.driver.ping(this.hostnameCache);
315 this.hostnameCache = {};
316 }
317 if (this.adCache.length > 50)
318 {
319 this.driver.ping({}, this.adCache);
320 this.adCache = [];
321 }
322 }
323 /**
324 * Parse apps.json patterns
325 */
326 parsePatterns(patterns)
327 {
328 if (!patterns)
329 {
330 return [];
331 }
332 let parsed = {};
333 // Convert string to object containing array containing string
334 if (typeof patterns === 'string' || patterns instanceof Array)
335 {
336 patterns = {
337 main: asArray(patterns),
338 };
339 }
340 Object.keys(patterns)
341 .forEach((key) =>
342 {
343 parsed[ key ] = [];
344 asArray(patterns[ key ])
345 .forEach((pattern) =>
346 {
347 const attrs = {};
348 pattern.split('\\;')
349 .forEach((attr, i) =>
350 {
351 if (i)
352 {
353 // Key value pairs
354 attr = attr.split(':');
355 if (attr.length > 1)
356 {
357 attrs[ attr.shift() ] = attr.join(':');
358 }
359 } else
360 {
361 attrs.string = attr;
362 try
363 {
364 attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
365 } catch (error)
366 {
367 attrs.regex = new RegExp();
368 this.log(`${ error.message }: ${ attr }`, 'error', 'core');
369 }
370 }
371 });
372 parsed[ key ].push(attrs);
373 });
374 });
375 // Convert back to array if the original pattern list was an array (or string)
376 if ('main' in parsed)
377 {
378 parsed = parsed.main;
379 }
380 return parsed;
381 }
382 /**
383 * Parse JavaScript patterns
384 */
385 parseJsPatterns()
386 {
387 Object.keys(this.apps)
388 .forEach((appName) =>
389 {
390 if (this.apps[ appName ].js)
391 {
392 this.jsPatterns[ appName ] = this.parsePatterns(this.apps[ appName ].js);
393 }
394 });
395 }
396 resolveImplies(apps, url)
397 {
398 let checkImplies = true;
399 const resolve = (appName) =>
400 {
401 const app = apps[ appName ];
402 if (app && app.props.implies)
403 {
404 asArray(app.props.implies)
405 .forEach((implied) =>
406 {
407 [ implied ] = this.parsePatterns(implied);
408 if (!this.apps[ implied.string ])
409 {
410 this.log(`Implied application ${ implied.string } does not exist`, 'core', 'warn');
411 return;
412 }
413 if (!(implied.string in apps))
414 {
415 apps[ implied.string ] = this.detected[ url ] && this.detected[ url ][ implied.string ] ? this.detected[ url ][ implied.string ] : new Application(implied.string, this.apps[ implied.string ], true);
416 checkImplies = true;
417 }
418 // Apply app confidence to implied app
419 Object.keys(app.confidence)
420 .forEach((id) =>
421 {
422 apps[ implied.string ].confidence[ `${ id } implied by ${ appName }` ] = app.confidence[ id ] * (implied.confidence === undefined ? 1 : implied.confidence / 100);
423 });
424 });
425 }
426 };
427 // Implied applications
428 // Run several passes as implied apps may imply other apps
429 while (checkImplies)
430 {
431 checkImplies = false;
432 Object.keys(apps)
433 .forEach(resolve);
434 }
435 }
436 /**
437 * Cache detected applications
438 */
439 cacheDetectedApps(apps, url)
440 {
441 Object.keys(apps)
442 .forEach((appName) =>
443 {
444 const app = apps[ appName ];
445 // Per URL
446 this.detected[ url ][ appName ] = app;
447 Object.keys(app.confidence)
448 .forEach((id) =>
449 {
450 this.detected[ url ][ appName ].confidence[ id ] = app.confidence[ id ];
451 });
452 });
453 if (this.driver.ping instanceof Function)
454 {
455 this.ping();
456 }
457 }
458 /**
459 * Track detected applications
460 */
461 trackDetectedApps(apps, url, language)
462 {
463 if (!(this.driver.ping instanceof Function))
464 {
465 return;
466 }
467 const hostname = `${ url.protocol }//${ url.hostname }`;
468 Object.keys(apps)
469 .forEach((appName) =>
470 {
471 const app = apps[ appName ];
472 if (this.detected[ url.canonical ][ appName ].getConfidence() >= 100)
473 {
474 if (validation.hostname.test(url.hostname) && !validation.hostnameBlacklist.test(url.hostname))
475 {
476 if (!(hostname in this.hostnameCache))
477 {
478 this.hostnameCache[ hostname ] = {
479 applications: {},
480 meta: {},
481 };
482 }
483 if (!(appName in this.hostnameCache[ hostname ].applications))
484 {
485 this.hostnameCache[ hostname ].applications[ appName ] = {
486 hits: 0,
487 };
488 }
489 this.hostnameCache[ hostname ].applications[ appName ].hits += 1;
490 if (apps[ appName ].version)
491 {
492 this.hostnameCache[ hostname ].applications[ appName ].version = app.version;
493 }
494 }
495 }
496 });
497 if (hostname in this.hostnameCache)
498 {
499 this.hostnameCache[ hostname ].meta.language = language;
500 }
501 this.ping();
502 }
503 /**
504 * Analyze URL
505 */
506 analyzeUrl(app, url)
507 {
508 const patterns = this.parsePatterns(app.props.url);
509 if (!patterns.length)
510 {
511 return Promise.resolve();
512 }
513 return asyncForEach(patterns, (pattern) =>
514 {
515 if (pattern.regex.test(url.canonical))
516 {
517 addDetected(app, pattern, 'url', url.canonical);
518 }
519 });
520 }
521 /**
522 * Analyze HTML
523 */
524 analyzeHtml(app, html)
525 {
526 const patterns = this.parsePatterns(app.props.html);
527 if (!patterns.length)
528 {
529 return Promise.resolve();
530 }
531 return asyncForEach(patterns, (pattern) =>
532 {
533 if (pattern.regex.test(html))
534 {
535 addDetected(app, pattern, 'html', html);
536 }
537 });
538 }
539 /**
540 * Analyze script tag
541 */
542 analyzeScripts(app, scripts)
543 {
544 const patterns = this.parsePatterns(app.props.script);
545 if (!patterns.length)
546 {
547 return Promise.resolve();
548 }
549 return asyncForEach(patterns, (pattern) =>
550 {
551 scripts.forEach((uri) =>
552 {
553 if (pattern.regex.test(uri))
554 {
555 addDetected(app, pattern, 'script', uri);
556 }
557 });
558 });
559 }
560 /**
561 * Analyze meta tag
562 */
563 analyzeMeta(app, metaTags)
564 {
565 const patterns = this.parsePatterns(app.props.meta);
566 const promises = [];
567 if (!app.props.meta)
568 {
569 return Promise.resolve();
570 }
571 metaTags.forEach((match) =>
572 {
573 Object.keys(patterns)
574 .forEach((meta) =>
575 {
576 const r = new RegExp(`(?:name|property)=["']${ meta }["']`, 'i');
577 if (r.test(match))
578 {
579 const content = match.match(/content=("|')([^"']+)("|')/i);
580 promises.push(asyncForEach(patterns[ meta ], (pattern) =>
581 {
582 if (content && content.length === 4 && pattern.regex.test(content[ 2 ]))
583 {
584 addDetected(app, pattern, 'meta', content[ 2 ], meta);
585 }
586 }));
587 }
588 });
589 });
590 return Promise.all(promises);
591 }
592 /**
593 * Analyze response headers
594 */
595 analyzeHeaders(app, headers)
596 {
597 const patterns = this.parsePatterns(app.props.headers);
598 const promises = [];
599 Object.keys(patterns)
600 .forEach((headerName) =>
601 {
602 if (typeof patterns[ headerName ] !== 'function')
603 {
604 promises.push(asyncForEach(patterns[ headerName ], (pattern) =>
605 {
606 headerName = headerName.toLowerCase();
607 if (headerName in headers)
608 {
609 headers[ headerName ].forEach((headerValue) =>
610 {
611 if (pattern.regex.test(headerValue))
612 {
613 addDetected(app, pattern, 'headers', headerValue, headerName);
614 }
615 });
616 }
617 }));
618 }
619 });
620 return promises ? Promise.all(promises) : Promise.resolve();
621 }
622 /**
623 * Analyze cookies
624 */
625 analyzeCookies(app, cookies)
626 {
627 const patterns = this.parsePatterns(app.props.cookies);
628 const promises = [];
629 Object.keys(patterns)
630 .forEach((cookieName) =>
631 {
632 if (typeof patterns[ cookieName ] !== 'function')
633 {
634 const cookieNameLower = cookieName.toLowerCase();
635 promises.push(asyncForEach(patterns[ cookieName ], (pattern) =>
636 {
637 const cookie = cookies.find(_cookie => _cookie.name.toLowerCase() === cookieNameLower);
638 if (cookie && pattern.regex.test(cookie.value))
639 {
640 addDetected(app, pattern, 'cookies', cookie.value, cookieName);
641 }
642 }));
643 }
644 });
645 return promises ? Promise.all(promises) : Promise.resolve();
646 }
647 /**
648 * Analyze JavaScript variables
649 */
650 analyzeJs(app, results)
651 {
652 const promises = [];
653 Object.keys(results)
654 .forEach((string) =>
655 {
656 if (typeof results[ string ] !== 'function')
657 {
658 promises.push(asyncForEach(Object.keys(results[ string ]), (index) =>
659 {
660 const pattern = this.jsPatterns[ app.name ][ string ][ index ];
661 const value = results[ string ][ index ];
662 if (pattern && pattern.regex.test(value))
663 {
664 addDetected(app, pattern, 'js', value, string);
665 }
666 }));
667 }
668 });
669 return promises ? Promise.all(promises) : Promise.resolve();
670 }
671}
672if (typeof module === 'object')
673{
674 module.exports = Detector;
675}
\No newline at end of file