From 7877da87c1b8f3558cc3597e7dba89a80c44e15a Mon Sep 17 00:00:00 2001 From: Elbert Alias Date: Sat, 1 Dec 2012 17:05:29 +1100 Subject: [PATCH] Added confidence level --- drivers/bookmarklet/js/wappalyzer.js | 101 ++++++++++++++-------- drivers/bookmarklet/json | 7 +- drivers/chrome/apps.json | 7 +- drivers/chrome/js/driver.js | 8 +- drivers/chrome/js/popup.js | 10 ++- drivers/chrome/js/wappalyzer.js | 101 ++++++++++++++-------- drivers/firefox-jetpack/data/apps.json | 7 +- drivers/firefox-jetpack/lib/wappalyzer.js | 101 ++++++++++++++-------- drivers/firefox/content/apps.json | 7 +- drivers/firefox/content/js/driver.js | 14 +-- drivers/firefox/content/js/wappalyzer.js | 101 ++++++++++++++-------- drivers/html/apps.json | 7 +- drivers/html/js/wappalyzer.js | 101 ++++++++++++++-------- drivers/php/Wappalyzer.php | 4 +- drivers/php/apps.json | 7 +- drivers/php/index.php | 4 +- drivers/php/js/driver.js | 17 ++-- drivers/php/js/wappalyzer.js | 101 ++++++++++++++-------- share/apps.json | 7 +- share/js/wappalyzer.js | 101 ++++++++++++++-------- 20 files changed, 533 insertions(+), 280 deletions(-) diff --git a/drivers/bookmarklet/js/wappalyzer.js b/drivers/bookmarklet/js/wappalyzer.js index b3f81db41..d87c376e2 100644 --- a/drivers/bookmarklet/js/wappalyzer.js +++ b/drivers/bookmarklet/js/wappalyzer.js @@ -30,8 +30,8 @@ var wappalyzer = (function() { var w = { apps: null, cats: null, - ping: {}, - detected: [], + ping: { hostnames: {} }, + detected: {}, config: { environment: 'dev', // dev | live @@ -103,11 +103,11 @@ var wappalyzer = (function() { } if ( typeof w.detected[url] === 'undefined' ) { - w.detected[url] = []; + w.detected[url] = {}; } var - i, app, type, regex, regexMeta, regexScript, match, content, meta, header, + i, app, confidence, type, regex, regexMeta, regexScript, match, content, meta, header, profiler = { regexCount: 0, startTime: new Date().getTime() @@ -118,11 +118,15 @@ var wappalyzer = (function() { appLoop: for ( app in w.apps ) { // Skip if the app has already been detected - if ( w.detected[url].indexOf(app) !== -1 || apps.indexOf(app) !== -1 ) { + if ( w.detected[url].hasOwnProperty(app) || apps.indexOf(app) !== -1 ) { continue; } for ( type in w.apps[app] ) { + confidence = {}; + + confidence[type] = w.apps[app].hasOwnProperty('confidence') && w.apps[app].confidence.hasOwnProperty(type) ? w.apps[app].confidence[type] : 100; + switch ( type ) { case 'url': regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); @@ -130,7 +134,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( regex.test(url) ) { - apps.push(app); + apps.push({ app: app, confidence: confidence }); continue appLoop; } @@ -146,7 +150,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( regex.test(data[type]) ) { - apps.push(app); + apps.push({ app: app, confidence: confidence }); continue appLoop; } @@ -166,7 +170,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( regex.test(match[2]) ) { - apps.push(app); + apps.push({ app: app, confidence: confidence }); continue appLoop; } @@ -194,7 +198,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( content && content.length === 4 && regex.test(content[2]) ) { - apps.push(app); + apps.push({ app: app, confidence: confidence }); continue appLoop; } @@ -214,7 +218,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( typeof data[type][header] === 'string' && regex.test(data[type][header]) ) { - apps.push(app); + apps.push({ app: app, confidence: confidence }); continue appLoop; } @@ -232,7 +236,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( regex.test(data[type][i]) ) { - apps.push(app); + apps.push({ app: app, confidence: confidence }); continue appLoop; } @@ -250,9 +254,11 @@ var wappalyzer = (function() { for ( i = 0; i < 3; i ++ ) { for ( j in apps ) { - if ( w.apps[apps[j]] && w.apps[apps[j]].implies ) { - for ( k in w.apps[apps[j]].implies ) { - implied = w.apps[apps[j]].implies[k]; + app = apps[j].app; + + if ( w.apps[app] && w.apps[app].implies ) { + for ( k in w.apps[app].implies ) { + implied = w.apps[app].implies[k]; if ( !w.apps[implied] ) { w.log('Implied application ' + implied + ' does not exist'); @@ -260,45 +266,72 @@ var wappalyzer = (function() { continue; } - if ( w.detected[url].indexOf(implied) === -1 && apps.indexOf(implied) === -1 ) { - apps.push(implied); + if ( !w.detected[url].hasOwnProperty(implied) && apps.indexOf(implied) === -1 ) { + apps.push({ app: implied, confidence: apps[j].confidence }); } } } } } - w.log(apps.length + ' apps detected: ' + apps.join(', ') + ' on ' + url); + w.log(apps.reduce(function(reduced, app) { + var i; + + for ( i in app.confidence ) { + return app.app + ' (' + app.confidence[i] + '%) '; + } + }, Object.keys(apps).length + ' apps detected: ') + 'on ' + url); // Keep history of detected apps var i, app, regex, regexMeta, match; for ( i in apps ) { - app = apps[i]; + app = apps[i].app; - // Per hostname - if ( /^[a-z0-9._\-]+\.[a-z]+/.test(hostname) && !/((local|dev|development|stage|staging|test|testing|demo|admin)\.|\/admin|\.local)/.test(url) ) { - if ( typeof w.ping.hostnames === 'undefined' ) { - w.ping.hostnames = {}; - } + confidence = apps[i].confidence; - if ( typeof w.ping.hostnames[hostname] === 'undefined' ) { - w.ping.hostnames[hostname] = { applications: {}, meta: {} }; - } + // Per URL + if ( !w.detected[url].hasOwnProperty(app)) { + w.detected[url][app] = {}; + } - if ( typeof w.ping.hostnames[hostname].applications[app] === 'undefined' ) { - w.ping.hostnames[hostname].applications[app] = 1; - } + for ( type in confidence ) { + w.detected[url][app][type] = confidence[type]; + } - w.ping.hostnames[hostname].applications[app] ++; + // Calculate confidence total + w.detected[url][app].total = 0; + + for ( type in w.detected[url][app] ) { + if ( type !== 'total' ) { + w.detected[url][app].total += w.detected[url][app][type]; + + w.detected[url][app].total = Math.min(w.detected[url][app].total, 100); + } } - // Per URL - if ( w.detected[url].indexOf(app) === -1 ) { w.detected[url].push(app); } + if ( w.detected[url][app].total >= 100 ) { + // Per hostname + if ( /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/.test(hostname) && !/((local|dev(elopment)?|stag(e|staging)?|test(ing)?|demo(shop)?|admin)\.|\/admin|\.local)/.test(url) ) { + if ( !w.ping.hostnames.hasOwnProperty(hostname) ) { + w.ping.hostnames[hostname] = { applications: {}, meta: {} }; + } + + if ( !w.ping.hostnames[hostname].applications.hasOwnProperty(app) ) { + w.ping.hostnames[hostname].applications[app] = 1; + } + + w.ping.hostnames[hostname].applications[app] ++; + } else { + w.log('Ignoring hostname "' + hostname + '"'); + } + } } + w.log(JSON.stringify(w.detected)); + // Additional information - if ( typeof w.ping.hostnames !== 'undefined' && typeof w.ping.hostnames[hostname] !== 'undefined' ) { + if ( w.ping.hostnames.hasOwnProperty(hostname) ) { if ( typeof data.html === 'string' && data.html ) { match = data.html.match(/]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i); @@ -322,7 +355,7 @@ var wappalyzer = (function() { w.log(hostname + ': ' + JSON.stringify(w.ping.hostnames[hostname])); } - if ( typeof w.ping.hostnames === 'object' && Object.keys(w.ping.hostnames).length >= 50 ) { driver('ping'); } + if ( Object.keys(w.ping.hostnames).length >= 50 ) { driver('ping'); } apps = null; data = null; diff --git a/drivers/bookmarklet/json b/drivers/bookmarklet/json index 894ffb09c..c59edee7b 100644 --- a/drivers/bookmarklet/json +++ b/drivers/bookmarklet/json @@ -1380,8 +1380,9 @@ "cats": [ 18 ], "script": "/assets/application-[a-z\\d]{32}/\\.js", "meta": { "csrf-param": "authenticity_token" }, - "headers": { "Server": "(mod_rails|mod_rack|Phusion\\.Passenger)", "X-Powered-By": "(mod_rails|mod_rack|Phusion\\.Passenger)" }, - "implies": [ "Ruby" ] + "headers": { "Server": "(mod_rails|mod_rack|Phusion(\\.|_)Passenger)", "X-Powered-By": "(mod_rails|mod_rack|Phusion(\\.|_)Passenger)" }, + "implies": [ "Ruby" ], + "confidence": { "script": 50, "meta": 50, "headers": 50 } }, "S.Builder": { "cats": [ 1 ], @@ -1581,7 +1582,7 @@ "Tumblr": { "cats": [ 11 ], "html": "