From 24b4bed5f3dca80f974ec6ca33d6c37e320a365a Mon Sep 17 00:00:00 2001 From: Elbert Alias Date: Sun, 14 Oct 2012 13:48:52 +1100 Subject: [PATCH] All driver now using apps.json, more performance improvements --- drivers/bookmarklet/js/wappalyzer.js | 48 ++++++++++++----------- drivers/bookmarklet/json | 2 +- drivers/chrome/apps.json | 2 +- drivers/chrome/js/driver.js | 9 +++-- drivers/chrome/js/popup.js | 6 +-- drivers/chrome/js/wappalyzer.js | 48 ++++++++++++----------- drivers/firefox-jetpack/data/apps.json | 2 +- drivers/firefox-jetpack/lib/wappalyzer.js | 48 ++++++++++++----------- drivers/firefox/content/apps.json | 2 +- drivers/firefox/content/js/content.js | 32 ++++++++------- drivers/firefox/content/js/wappalyzer.js | 48 ++++++++++++----------- drivers/html/apps.json | 2 +- drivers/html/js/driver.js | 16 ++++++++ drivers/html/js/wappalyzer.js | 48 ++++++++++++----------- drivers/php/Wappalyzer.php | 13 +++++- drivers/php/apps.json | 2 +- drivers/php/js/wappalyzer.js | 48 ++++++++++++----------- share/apps.json | 2 +- share/js/wappalyzer.js | 48 ++++++++++++----------- 19 files changed, 236 insertions(+), 190 deletions(-) diff --git a/drivers/bookmarklet/js/wappalyzer.js b/drivers/bookmarklet/js/wappalyzer.js index edcd0192e..2b6db1236 100644 --- a/drivers/bookmarklet/js/wappalyzer.js +++ b/drivers/bookmarklet/js/wappalyzer.js @@ -105,7 +105,7 @@ var wappalyzer = wappalyzer || (function() { } var - i, app, type, regex, match, content, meta, header, + i, app, type, regex, regexMeta, regexScript, match, content, meta, header, profiler = { regexCount: 0, startTime: ( new Date ).getTime() @@ -113,86 +113,88 @@ var wappalyzer = wappalyzer || (function() { apps = [] ; + appLoop: for ( app in w.apps ) { // Skip if the app has already been detected if ( w.detected[url].indexOf(app) !== -1 || apps.indexOf(app) !== -1 ) { continue; } - next: - for ( type in w.apps[app] ) { - if ( data[type] == null ) { - continue; - } - switch ( type ) { case 'url': - regex = new RegExp(w.apps[app][type], 'i'); + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( regex.test(url) ) { apps.push(app); - break next; + continue appLoop; } break; case 'html': - regex = new RegExp(w.apps[app][type], 'i'); + if ( data[type] == null ) { + break; + } + + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( regex.test(data[type]) ) { apps.push(app); - break next; + continue appLoop; } break; case 'script': - if ( data['html'] == null ) { + if ( data.html == null ) { break; } - regex = new RegExp(w.apps[app][type], 'i'); + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); + regexScript = new RegExp(']+src=("|\')([^"\']+)\1', 'ig'); profiler.regexCount ++; - while ( match = new RegExp(']+src=("|\')([^"\']+)\1', 'ig').exec(data['html']) ) { + while ( match = regexScript.exec(data.html) ) { profiler.regexCount ++; if ( regex.test(match[2]) ) { apps.push(app); - break next; + continue appLoop; } } break; case 'meta': - if ( data['html'] == null ) { + if ( data.html == null ) { break; } profiler.regexCount ++; - while ( match = new RegExp(']+>', 'ig').exec(data['html']) ) { + regexMeta = new RegExp(']+>', 'ig'); + + while ( match = regexMeta.exec(data.html) ) { for ( meta in w.apps[app][type] ) { profiler.regexCount ++; if ( new RegExp('name=["\']' + meta + '["\']', 'i').test(match) ) { content = match.toString().match(/content=("|')([^"']+)("|')/i); - regex = new RegExp(w.apps[app].meta[meta], 'i'); + regex = new RegExp(w.apps[app].meta[meta].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( content && content.length === 4 && regex.test(content[2]) ) { apps.push(app); - break next; + continue appLoop; } } } @@ -205,14 +207,14 @@ var wappalyzer = wappalyzer || (function() { } for ( header in w.apps[app].headers ) { - regex = new RegExp(w.apps[app][type][header], 'i'); + regex = new RegExp(w.apps[app][type][header].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( data[type][header] != null && regex.test(data[type][header]) ) { apps.push(app); - break next; + continue appLoop; } } @@ -222,7 +224,7 @@ var wappalyzer = wappalyzer || (function() { break; } - regex = RegExp(w.apps[app][type], 'i'); + regex = RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); for ( i in data[type] ) { profiler.regexCount ++; @@ -230,7 +232,7 @@ var wappalyzer = wappalyzer || (function() { if ( regex.test(data[type][i]) ) { apps.push(app); - break next; + continue appLoop; } } diff --git a/drivers/bookmarklet/json b/drivers/bookmarklet/json index c995b007b..cb478903e 100644 --- a/drivers/bookmarklet/json +++ b/drivers/bookmarklet/json @@ -767,7 +767,7 @@ }, "jQuery": { "cats": [ "12" ], - "script": "jquery.*.js", + "script": "jquery.*\\.js", "env": "^jQuery$" }, "jQuery Mobile": { diff --git a/drivers/chrome/apps.json b/drivers/chrome/apps.json index c995b007b..cb478903e 100644 --- a/drivers/chrome/apps.json +++ b/drivers/chrome/apps.json @@ -767,7 +767,7 @@ }, "jQuery": { "cats": [ "12" ], - "script": "jquery.*.js", + "script": "jquery.*\\.js", "env": "^jQuery$" }, "jQuery Mobile": { diff --git a/drivers/chrome/js/driver.js b/drivers/chrome/js/driver.js index 8115396d5..90fb825ca 100644 --- a/drivers/chrome/js/driver.js +++ b/drivers/chrome/js/driver.js @@ -87,7 +87,11 @@ break; case 'get_apps': - sendResponse({ tabCache: tabCache[request.tab.id] }); + sendResponse({ + tabCache: tabCache[request.tab.id], + apps: w.apps, + categories: w.categories + }); break; } @@ -141,7 +145,7 @@ appName = w.detected[tab.url][i]; w.apps[appName].cats.map(function(cat) { - if ( cat === match && !found ) { + if ( cat == match && !found ) { chrome.browserAction.setIcon({ tabId: tab.id, path: 'images/icons/' + appName + '.png' }); found = true; @@ -154,7 +158,6 @@ }; }, - /** * Anonymously track detected applications for research purposes */ diff --git a/drivers/chrome/js/popup.js b/drivers/chrome/js/popup.js index 1c71449fc..e9972ee09 100644 --- a/drivers/chrome/js/popup.js +++ b/drivers/chrome/js/popup.js @@ -1,5 +1,3 @@ -var wappalyzer = {}; - (function() { var popup = { pollHeaders: null, @@ -58,9 +56,9 @@ var wappalyzer = {}; '' + appName + '' + ''; - wappalyzer.apps[appName].cats.map(function(cat) { + response.apps[appName].cats.map(function(cat) { html += - '' + + '' + '' + chrome.i18n.getMessage('categoryName' + cat) + '' + ''; }); diff --git a/drivers/chrome/js/wappalyzer.js b/drivers/chrome/js/wappalyzer.js index edcd0192e..2b6db1236 100644 --- a/drivers/chrome/js/wappalyzer.js +++ b/drivers/chrome/js/wappalyzer.js @@ -105,7 +105,7 @@ var wappalyzer = wappalyzer || (function() { } var - i, app, type, regex, match, content, meta, header, + i, app, type, regex, regexMeta, regexScript, match, content, meta, header, profiler = { regexCount: 0, startTime: ( new Date ).getTime() @@ -113,86 +113,88 @@ var wappalyzer = wappalyzer || (function() { apps = [] ; + appLoop: for ( app in w.apps ) { // Skip if the app has already been detected if ( w.detected[url].indexOf(app) !== -1 || apps.indexOf(app) !== -1 ) { continue; } - next: - for ( type in w.apps[app] ) { - if ( data[type] == null ) { - continue; - } - switch ( type ) { case 'url': - regex = new RegExp(w.apps[app][type], 'i'); + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( regex.test(url) ) { apps.push(app); - break next; + continue appLoop; } break; case 'html': - regex = new RegExp(w.apps[app][type], 'i'); + if ( data[type] == null ) { + break; + } + + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( regex.test(data[type]) ) { apps.push(app); - break next; + continue appLoop; } break; case 'script': - if ( data['html'] == null ) { + if ( data.html == null ) { break; } - regex = new RegExp(w.apps[app][type], 'i'); + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); + regexScript = new RegExp(']+src=("|\')([^"\']+)\1', 'ig'); profiler.regexCount ++; - while ( match = new RegExp(']+src=("|\')([^"\']+)\1', 'ig').exec(data['html']) ) { + while ( match = regexScript.exec(data.html) ) { profiler.regexCount ++; if ( regex.test(match[2]) ) { apps.push(app); - break next; + continue appLoop; } } break; case 'meta': - if ( data['html'] == null ) { + if ( data.html == null ) { break; } profiler.regexCount ++; - while ( match = new RegExp(']+>', 'ig').exec(data['html']) ) { + regexMeta = new RegExp(']+>', 'ig'); + + while ( match = regexMeta.exec(data.html) ) { for ( meta in w.apps[app][type] ) { profiler.regexCount ++; if ( new RegExp('name=["\']' + meta + '["\']', 'i').test(match) ) { content = match.toString().match(/content=("|')([^"']+)("|')/i); - regex = new RegExp(w.apps[app].meta[meta], 'i'); + regex = new RegExp(w.apps[app].meta[meta].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( content && content.length === 4 && regex.test(content[2]) ) { apps.push(app); - break next; + continue appLoop; } } } @@ -205,14 +207,14 @@ var wappalyzer = wappalyzer || (function() { } for ( header in w.apps[app].headers ) { - regex = new RegExp(w.apps[app][type][header], 'i'); + regex = new RegExp(w.apps[app][type][header].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( data[type][header] != null && regex.test(data[type][header]) ) { apps.push(app); - break next; + continue appLoop; } } @@ -222,7 +224,7 @@ var wappalyzer = wappalyzer || (function() { break; } - regex = RegExp(w.apps[app][type], 'i'); + regex = RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); for ( i in data[type] ) { profiler.regexCount ++; @@ -230,7 +232,7 @@ var wappalyzer = wappalyzer || (function() { if ( regex.test(data[type][i]) ) { apps.push(app); - break next; + continue appLoop; } } diff --git a/drivers/firefox-jetpack/data/apps.json b/drivers/firefox-jetpack/data/apps.json index c995b007b..cb478903e 100644 --- a/drivers/firefox-jetpack/data/apps.json +++ b/drivers/firefox-jetpack/data/apps.json @@ -767,7 +767,7 @@ }, "jQuery": { "cats": [ "12" ], - "script": "jquery.*.js", + "script": "jquery.*\\.js", "env": "^jQuery$" }, "jQuery Mobile": { diff --git a/drivers/firefox-jetpack/lib/wappalyzer.js b/drivers/firefox-jetpack/lib/wappalyzer.js index edcd0192e..2b6db1236 100644 --- a/drivers/firefox-jetpack/lib/wappalyzer.js +++ b/drivers/firefox-jetpack/lib/wappalyzer.js @@ -105,7 +105,7 @@ var wappalyzer = wappalyzer || (function() { } var - i, app, type, regex, match, content, meta, header, + i, app, type, regex, regexMeta, regexScript, match, content, meta, header, profiler = { regexCount: 0, startTime: ( new Date ).getTime() @@ -113,86 +113,88 @@ var wappalyzer = wappalyzer || (function() { apps = [] ; + appLoop: for ( app in w.apps ) { // Skip if the app has already been detected if ( w.detected[url].indexOf(app) !== -1 || apps.indexOf(app) !== -1 ) { continue; } - next: - for ( type in w.apps[app] ) { - if ( data[type] == null ) { - continue; - } - switch ( type ) { case 'url': - regex = new RegExp(w.apps[app][type], 'i'); + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( regex.test(url) ) { apps.push(app); - break next; + continue appLoop; } break; case 'html': - regex = new RegExp(w.apps[app][type], 'i'); + if ( data[type] == null ) { + break; + } + + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( regex.test(data[type]) ) { apps.push(app); - break next; + continue appLoop; } break; case 'script': - if ( data['html'] == null ) { + if ( data.html == null ) { break; } - regex = new RegExp(w.apps[app][type], 'i'); + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); + regexScript = new RegExp(']+src=("|\')([^"\']+)\1', 'ig'); profiler.regexCount ++; - while ( match = new RegExp(']+src=("|\')([^"\']+)\1', 'ig').exec(data['html']) ) { + while ( match = regexScript.exec(data.html) ) { profiler.regexCount ++; if ( regex.test(match[2]) ) { apps.push(app); - break next; + continue appLoop; } } break; case 'meta': - if ( data['html'] == null ) { + if ( data.html == null ) { break; } profiler.regexCount ++; - while ( match = new RegExp(']+>', 'ig').exec(data['html']) ) { + regexMeta = new RegExp(']+>', 'ig'); + + while ( match = regexMeta.exec(data.html) ) { for ( meta in w.apps[app][type] ) { profiler.regexCount ++; if ( new RegExp('name=["\']' + meta + '["\']', 'i').test(match) ) { content = match.toString().match(/content=("|')([^"']+)("|')/i); - regex = new RegExp(w.apps[app].meta[meta], 'i'); + regex = new RegExp(w.apps[app].meta[meta].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( content && content.length === 4 && regex.test(content[2]) ) { apps.push(app); - break next; + continue appLoop; } } } @@ -205,14 +207,14 @@ var wappalyzer = wappalyzer || (function() { } for ( header in w.apps[app].headers ) { - regex = new RegExp(w.apps[app][type][header], 'i'); + regex = new RegExp(w.apps[app][type][header].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( data[type][header] != null && regex.test(data[type][header]) ) { apps.push(app); - break next; + continue appLoop; } } @@ -222,7 +224,7 @@ var wappalyzer = wappalyzer || (function() { break; } - regex = RegExp(w.apps[app][type], 'i'); + regex = RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); for ( i in data[type] ) { profiler.regexCount ++; @@ -230,7 +232,7 @@ var wappalyzer = wappalyzer || (function() { if ( regex.test(data[type][i]) ) { apps.push(app); - break next; + continue appLoop; } } diff --git a/drivers/firefox/content/apps.json b/drivers/firefox/content/apps.json index c995b007b..cb478903e 100644 --- a/drivers/firefox/content/apps.json +++ b/drivers/firefox/content/apps.json @@ -767,7 +767,7 @@ }, "jQuery": { "cats": [ "12" ], - "script": "jquery.*.js", + "script": "jquery.*\\.js", "env": "^jQuery$" }, "jQuery Mobile": { diff --git a/drivers/firefox/content/js/content.js b/drivers/firefox/content/js/content.js index 410ba565c..82bf29fce 100644 --- a/drivers/firefox/content/js/content.js +++ b/drivers/firefox/content/js/content.js @@ -4,38 +4,38 @@ var data = {}, lastEnv = [], - prefs = null + prefs = sendSyncMessage('wappalyzer', { action: 'get prefs' })[0] ; addEventListener('DOMContentLoaded', function() { removeEventListener('DOMContentLoaded', onLoad, false); - if ( prefs != null || content.document.contentType != 'text/html' ) { - return; - } - - prefs = sendSyncMessage('wappalyzer', { action: 'get prefs' })[0]; - onLoad(); }, false); function onLoad() { + if ( content.document.contentType != 'text/html' ) { + return; + } + if ( prefs.analyzeJavaScript && prefs.analyzeOnLoad ) { content.document.documentElement.addEventListener('load', function() { - var env = Object.keys(content.wrappedJSObject); - - // Only analyze new variables - data = { env: env.filter(function(i) { return lastEnv.indexOf(i) === -1; }) }; + var env = Object.keys(content.wrappedJSObject).slice(0, 500); lastEnv = env; - if ( data.env.length ) { + // Only analyze new variables + env = { env: env.filter(function(i) { return lastEnv.indexOf(i) === -1; }) }; + + if ( env.length ) { sendAsyncMessage('wappalyzer', { action: 'analyze', - analyze: data + analyze: { env: env } }); } + env = null; + removeEventListener('load', onLoad, true); }, true); } @@ -55,7 +55,9 @@ data = { html: html }; if ( prefs.analyzeJavaScript ) { - data.env = Object.keys(content.wrappedJSObject); + data.env = Object.keys(content.wrappedJSObject).slice(0, 500); + + lastEnv = data.env; } sendAsyncMessage('wappalyzer', { @@ -64,5 +66,7 @@ url: content.location.href, analyze: data }); + + data = null; } })(); diff --git a/drivers/firefox/content/js/wappalyzer.js b/drivers/firefox/content/js/wappalyzer.js index edcd0192e..2b6db1236 100644 --- a/drivers/firefox/content/js/wappalyzer.js +++ b/drivers/firefox/content/js/wappalyzer.js @@ -105,7 +105,7 @@ var wappalyzer = wappalyzer || (function() { } var - i, app, type, regex, match, content, meta, header, + i, app, type, regex, regexMeta, regexScript, match, content, meta, header, profiler = { regexCount: 0, startTime: ( new Date ).getTime() @@ -113,86 +113,88 @@ var wappalyzer = wappalyzer || (function() { apps = [] ; + appLoop: for ( app in w.apps ) { // Skip if the app has already been detected if ( w.detected[url].indexOf(app) !== -1 || apps.indexOf(app) !== -1 ) { continue; } - next: - for ( type in w.apps[app] ) { - if ( data[type] == null ) { - continue; - } - switch ( type ) { case 'url': - regex = new RegExp(w.apps[app][type], 'i'); + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( regex.test(url) ) { apps.push(app); - break next; + continue appLoop; } break; case 'html': - regex = new RegExp(w.apps[app][type], 'i'); + if ( data[type] == null ) { + break; + } + + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( regex.test(data[type]) ) { apps.push(app); - break next; + continue appLoop; } break; case 'script': - if ( data['html'] == null ) { + if ( data.html == null ) { break; } - regex = new RegExp(w.apps[app][type], 'i'); + regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); + regexScript = new RegExp(']+src=("|\')([^"\']+)\1', 'ig'); profiler.regexCount ++; - while ( match = new RegExp(']+src=("|\')([^"\']+)\1', 'ig').exec(data['html']) ) { + while ( match = regexScript.exec(data.html) ) { profiler.regexCount ++; if ( regex.test(match[2]) ) { apps.push(app); - break next; + continue appLoop; } } break; case 'meta': - if ( data['html'] == null ) { + if ( data.html == null ) { break; } profiler.regexCount ++; - while ( match = new RegExp(']+>', 'ig').exec(data['html']) ) { + regexMeta = new RegExp(']+>', 'ig'); + + while ( match = regexMeta.exec(data.html) ) { for ( meta in w.apps[app][type] ) { profiler.regexCount ++; if ( new RegExp('name=["\']' + meta + '["\']', 'i').test(match) ) { content = match.toString().match(/content=("|')([^"']+)("|')/i); - regex = new RegExp(w.apps[app].meta[meta], 'i'); + regex = new RegExp(w.apps[app].meta[meta].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( content && content.length === 4 && regex.test(content[2]) ) { apps.push(app); - break next; + continue appLoop; } } } @@ -205,14 +207,14 @@ var wappalyzer = wappalyzer || (function() { } for ( header in w.apps[app].headers ) { - regex = new RegExp(w.apps[app][type][header], 'i'); + regex = new RegExp(w.apps[app][type][header].replace('/', '\\\/'), 'i'); profiler.regexCount ++; if ( data[type][header] != null && regex.test(data[type][header]) ) { apps.push(app); - break next; + continue appLoop; } } @@ -222,7 +224,7 @@ var wappalyzer = wappalyzer || (function() { break; } - regex = RegExp(w.apps[app][type], 'i'); + regex = RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); for ( i in data[type] ) { profiler.regexCount ++; @@ -230,7 +232,7 @@ var wappalyzer = wappalyzer || (function() { if ( regex.test(data[type][i]) ) { apps.push(app); - break next; + continue appLoop; } } diff --git a/drivers/html/apps.json b/drivers/html/apps.json index c995b007b..cb478903e 100644 --- a/drivers/html/apps.json +++ b/drivers/html/apps.json @@ -767,7 +767,7 @@ }, "jQuery": { "cats": [ "12" ], - "script": "jquery.*.js", + "script": "jquery.*\\.js", "env": "^jQuery$" }, "jQuery Mobile": { diff --git a/drivers/html/js/driver.js b/drivers/html/js/driver.js index a54fb1bdc..085d2223f 100644 --- a/drivers/html/js/driver.js +++ b/drivers/html/js/driver.js @@ -15,6 +15,22 @@ * Initialize */ init: function() { + // Load apps.json + var xhr = new XMLHttpRequest(); + + xhr.open('GET', 'apps.json', true); + + xhr.overrideMimeType('application/json'); + + xhr.onload = function() { + var json = JSON.parse(xhr.responseText); + + w.categories = json.categories; + w.apps = json.apps; + }; + + xhr.send(null); + window.document.addEventListener('DOMContentLoaded', function() { w.analyze('google.com', 'http://google.com', { html: '