diff --git a/.gitattributes b/.gitattributes index 87000817f..41aa20c96 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,19 +1,19 @@ # End-of-line normalization * text=auto -*.cmd text eol=lf -*.sh text eol=lf -*.md text eol=lf -*.json text eol=lf -*.js text eol=lf -*.html text eol=lf -*.css text eol=lf -*.manifest text eol=lf -*.rdf text eol=lf -*.xul text eol=lf -*.dtd text eol=lf -*.properties text eol=lf -*.php text eol=lf +*.cmd text eol=lf +*.sh text eol=lf +*.md text eol=lf +*.json text eol=lf +*.js text eol=lf +*.html text eol=lf +*.css text eol=lf +*.manifest text eol=lf +*.rdf text eol=lf +*.xul text eol=lf +*.dtd text eol=lf +*.properties text eol=lf +*.php text eol=lf -*.png binary -*.gif binary +*.png binary +*.gif binary diff --git a/README.md b/README.md index 190a5c9cd..43a0cfa9f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -Wappalyzer -========== +# Wappalyzer [Wappalyzer](http://wappalyzer.com/) is a [browser extension](http://wappalyzer.com/download) that uncovers the @@ -11,44 +10,84 @@ technologies used on websites. It detects [analytics tools](http://wappalyzer.com/categories/analytics) and [many more](http://wappalyzer.com/applications). +*Licensed under the [GPL](https://github.com/ElbertF/Wappalyzer/blob/master/LICENSE).* -Contributing ------------- +## Contributing -**Adding a new application** +### Adding a new application -* Edit `share/apps.json` and use a validator like http://jsonformatter.curiousconcept.com/ to verify your modification. -* Add a 16x16 PNG image to `share/images/icons` matching the application name and compressed with a loss-less tools like http://www.smushit.com/ or optipng http://optipng.sourceforge.net/. +* Edit `share/apps.json` (use a JSON + [validator](http://jsonformatter.curiousconcept.com)). +* Add a 16x16 PNG image to `share/images/icons` matching the application name + (use [Smush.it](http://www.smushit.com) or + [OptiPNG](http://optipng.sourceforge.net) for compression). * Provide the URL to the application's website when submitting a pull request. -Example: + +### Adding a new category + +Please open an issue to discuss first. Adding a category involves updating `apps.json`, +preference pages, locales and [wappalyzer.com](http://wappalyzer.com). + + +### Adding a new translation + +#### Mozilla Firefox + +Copy `drivers/firefox/locale/en-US`. + + +#### Google Chrome + +Copy `drivers/chrome/_locales/en`. + + +## apps.json + +#### Example ```javascript "Application Name": { - cats: [ "1" ], - headers: { "X-Powered-By": "Application Name" }, - url: ".+\\.application-name\\.com", - html: "]application-name\\.css", - meta: { "generator": "Application Name" }, - script: "application-name\\.js", - env: "ApplicationName", - implies: [ "PHP" ] + "cats": [ 1 ], + "headers": { "X-Powered-By": "Application Name" }, + "url": ".+\\.application-name\\.com", + "html": "]application-name\\.css", + "meta": { "generator": "Application Name" }, + "script": "application-name\\.js", + "env": "ApplicationName", + "implies": [ "PHP" ], + "confidence": { "html": 50, "script": 50 } } ``` +### JSON fields -Drivers -------- +field | type | description +-----------|--------|----------------------- +cats | array | List of category IDs. See [apps.json](https://github.com/ElbertF/Wappalyzer/blob/master/share/apps.json) for the complete list. +confidence | object | Indicates less reliable patterns that may cause false positives. The aim is to achieve a combined confidence of 100%. Defaults to 100% for unspecified fields. +env | string | Global JavaScript variables, e.g. `jQuery`. +headers | object | HTTP Response headers, e.g. `X-Powered-By`. +html | string | Full HTML response body. +implies | array | The presence of one application can imply the presence of another, e.g. Drupal means PHP is also in use. +url | string | URL of the page, e.g. `http://wordpress.com/index.php`. +meta | object | HTML meta tags, e.g. `generator`. +script | string | `src` attribute of HTML script tags, e.g. `jquery.js`. + +Except `cats`, all fields are optional. + + +## Drivers Wappalyzer is multi-platform. The main code lives in the `share/` directory and platform specific code in `drivers/`. The sections below describe how to set up a development environment for the various existing drivers. -To keep files synchronised between drivers, run the `links.sh` script (UNIX-like -systems only, Windows users can use `links.cmd`.) +To keep files synchronised between drivers, run `links.sh` (UNIX-like systems) +or `links.cmd` (Windows). -**Mozilla Firefox** +### Mozilla Firefox * Place a file called `wappalyzer@crunchlabz.com` in the extensions directory in your [profile folder](http://kb.mozillazine.org/Profile_folder_-_Firefox) @@ -59,11 +98,7 @@ systems only, Windows users can use `links.cmd`.) * Ctrl+Shift+J brings up a console for debugging. -**Google Chrome** - -The Chrome version needs some love, if anyone wants to pick it up. It's -currently not as feature-rich as the Firefox add-on (although partially due to -API limitations.) +### Google Chrome * Navigate to `about:extensions` * Check "Developer mode" @@ -71,12 +106,13 @@ API limitations.) * Select `drivers/chrome/` -**Bookmarklet** +### Bookmarklet -Beta version available for testing at [wappalyzer.com/bookmarklet](http://wappalyzer.com/bookmarklet). +Beta version available for testing at +[wappalyzer.com/bookmarklet](http://wappalyzer.com/bookmarklet). -**HTML** +### HTML The HTML driver serves purely as an example. It's a good starting point if you want to port Wappalyzer to a new platform. @@ -84,10 +120,11 @@ want to port Wappalyzer to a new platform. * Navigate to `drivers/html/` -**PHP** +### PHP -The PHP driver requires the [V8js](http://php.net/manual/en/book.v8js.php) class. Installing V8js -using [PECL](http://pecl.php.net/) on Debian Linux or Ubuntu should be very straight forward: +The PHP driver requires the [V8js](http://php.net/manual/en/book.v8js.php) +class. Installing V8js using [PECL](http://pecl.php.net/) on Debian Linux or +Ubuntu should be very straight forward: * `# aptitude install php5-dev php-pear libv8-dev` * `# pecl install channel://pecl.php.net/v8js-0.1.3` @@ -109,53 +146,29 @@ $detectedApps = $wappalyzer->analyze(); ``` -**Mozilla Jetpack** +### Mozilla Jetpack Work in progress, experimental. See https://wiki.mozilla.org/Jetpack. -Unofficial drivers and ports ----------------------------- +## Unofficial drivers and ports -**Python** +### Python A Python driver by [@ebradbury](https://github.com/ebradbury). https://github.com/ebradbury/Wappalyzer/tree/master/drivers/python -**Ruby** +### Ruby A Ruby port by [@skroutz](https://github.com/skroutz). https://github.com/skroutz/wappalyzer-ruby -Screenshot ----------- +## Screenshot Wappalyzer on Firefox: -![Screenshot](http://wappalyzer.com/sites/default/themes/wappalyzer/images/installed.png) - - -License -------- ->Wappalyzer -> ->Copyright © 2012 Elbert Alias and Wappalyzer project contributors -> ->This program is free software: you can redistribute it and/or modify ->it under the terms of the GNU General Public License as published by ->the Free Software Foundation, either version 3 of the License, or ->(at your option) any later version. -> ->This program is distributed in the hope that it will be useful, ->but WITHOUT ANY WARRANTY; without even the implied warranty of ->MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ->GNU General Public License for more details. -> ->You should have received a copy of the GNU General Public License ->along with this program. If not, see . - -A copy of the license is available in the `LICENSE` file. \ No newline at end of file +![A screenshot of Wappalyzer on Firefox](http://wappalyzer.com/sites/default/themes/wappalyzer/images/installed.png) \ No newline at end of file diff --git a/drivers/bookmarklet/js/wappalyzer.js b/drivers/bookmarklet/js/wappalyzer.js index b3f81db41..e61da003b 100644 --- a/drivers/bookmarklet/js/wappalyzer.js +++ b/drivers/bookmarklet/js/wappalyzer.js @@ -28,10 +28,10 @@ var wappalyzer = (function() { * Main script */ var w = { - apps: null, + apps: {}, cats: null, - ping: {}, - detected: [], + ping: { hostnames: {} }, + detected: {}, config: { environment: 'dev', // dev | live @@ -90,6 +90,15 @@ var wappalyzer = (function() { * Analyze the request */ analyze: function(hostname, url, data) { + var + i, j, app, confidence, type, regex, regexMeta, regexScript, match, content, meta, header, + profiler = { + regexCount: 0, + startTime: new Date().getTime() + }, + apps = [] + ; + w.log('w.analyze'); url = url.split('#')[0]; @@ -103,26 +112,21 @@ var wappalyzer = (function() { } if ( typeof w.detected[url] === 'undefined' ) { - w.detected[url] = []; + w.detected[url] = {}; } - var - i, app, type, regex, regexMeta, regexScript, match, content, meta, header, - profiler = { - regexCount: 0, - startTime: new Date().getTime() - }, - apps = [] - ; - appLoop: for ( app in w.apps ) { // Skip if the app has already been detected - if ( w.detected[url].indexOf(app) !== -1 || apps.indexOf(app) !== -1 ) { + if ( w.detected[url].hasOwnProperty(app) || apps.indexOf(app) !== -1 ) { continue; } for ( type in w.apps[app] ) { + confidence = {}; + + confidence[type] = w.apps[app].hasOwnProperty('confidence') && w.apps[app].confidence.hasOwnProperty(type) ? w.apps[app].confidence[type] : 100; + switch ( type ) { case 'url': regex = new RegExp(w.apps[app][type].replace('/', '\\\/'), 'i'); @@ -130,7 +134,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( regex.test(url) ) { - apps.push(app); + apps[app] = confidence; continue appLoop; } @@ -146,7 +150,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( regex.test(data[type]) ) { - apps.push(app); + apps[app] = confidence; continue appLoop; } @@ -166,7 +170,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( regex.test(match[2]) ) { - apps.push(app); + apps[app] = confidence; continue appLoop; } @@ -194,7 +198,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( content && content.length === 4 && regex.test(content[2]) ) { - apps.push(app); + apps[app] = confidence; continue appLoop; } @@ -214,7 +218,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( typeof data[type][header] === 'string' && regex.test(data[type][header]) ) { - apps.push(app); + apps[app] = confidence; continue appLoop; } @@ -232,7 +236,7 @@ var wappalyzer = (function() { profiler.regexCount ++; if ( regex.test(data[type][i]) ) { - apps.push(app); + apps[app] = confidence; continue appLoop; } @@ -246,59 +250,82 @@ var wappalyzer = (function() { w.log('Tested ' + profiler.regexCount + ' regular expressions in ' + ( ( ( new Date ).getTime() - profiler.startTime ) / 1000 ) + 's'); // Implied applications - var i, j, k, implied; - + // Run several passes as implied apps may imply other apps for ( i = 0; i < 3; i ++ ) { - for ( j in apps ) { - if ( w.apps[apps[j]] && w.apps[apps[j]].implies ) { - for ( k in w.apps[apps[j]].implies ) { - implied = w.apps[apps[j]].implies[k]; + for ( app in apps ) { + confidence = apps[app]; + if ( w.apps[app] && w.apps[app].implies ) { + w.apps[app].implies.map(function(implied) { if ( !w.apps[implied] ) { w.log('Implied application ' + implied + ' does not exist'); - continue; + return; } - if ( w.detected[url].indexOf(implied) === -1 && apps.indexOf(implied) === -1 ) { - apps.push(implied); + // Apply app confidence to implied app + if ( !apps.hasOwnProperty(implied) ) { + apps[implied] = {}; } - } + + for ( type in confidence ) { + if ( !apps[implied].hasOwnProperty(type + ' implied by ' + app) ) { + apps[implied][type + ' implied by ' + app] = confidence[type]; + } + } + }); } } } - w.log(apps.length + ' apps detected: ' + apps.join(', ') + ' on ' + url); + w.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + 'on ' + url); // Keep history of detected apps - var i, app, regex, regexMeta, match; + for ( app in apps ) { + confidence = apps[app]; - for ( i in apps ) { - app = apps[i]; + // Per URL + if ( !w.detected[url].hasOwnProperty(app)) { + w.detected[url][app] = {}; + } - // Per hostname - if ( /^[a-z0-9._\-]+\.[a-z]+/.test(hostname) && !/((local|dev|development|stage|staging|test|testing|demo|admin)\.|\/admin|\.local)/.test(url) ) { - if ( typeof w.ping.hostnames === 'undefined' ) { - w.ping.hostnames = {}; - } + for ( type in confidence ) { + w.detected[url][app][type] = confidence[type]; + } - if ( typeof w.ping.hostnames[hostname] === 'undefined' ) { - w.ping.hostnames[hostname] = { applications: {}, meta: {} }; - } + // Calculate confidence total + w.detected[url][app].total = 0; - if ( typeof w.ping.hostnames[hostname].applications[app] === 'undefined' ) { - w.ping.hostnames[hostname].applications[app] = 1; - } + for ( type in w.detected[url][app] ) { + if ( type !== 'total' ) { + w.detected[url][app].total += w.detected[url][app][type]; - w.ping.hostnames[hostname].applications[app] ++; + w.detected[url][app].total = Math.min(w.detected[url][app].total, 100); + } } - // Per URL - if ( w.detected[url].indexOf(app) === -1 ) { w.detected[url].push(app); } + if ( w.detected[url][app].total >= 100 ) { + // Per hostname + if ( /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/.test(hostname) && !/((local|dev(elopment)?|stag(e|staging)?|test(ing)?|demo(shop)?|admin)\.|\/admin|\.local)/.test(url) ) { + if ( !w.ping.hostnames.hasOwnProperty(hostname) ) { + w.ping.hostnames[hostname] = { applications: {}, meta: {} }; + } + + if ( !w.ping.hostnames[hostname].applications.hasOwnProperty(app) ) { + w.ping.hostnames[hostname].applications[app] = 1; + } + + w.ping.hostnames[hostname].applications[app] ++; + } else { + w.log('Ignoring hostname "' + hostname + '"'); + } + } } + w.log(JSON.stringify(w.detected)); + // Additional information - if ( typeof w.ping.hostnames !== 'undefined' && typeof w.ping.hostnames[hostname] !== 'undefined' ) { + if ( w.ping.hostnames.hasOwnProperty(hostname) ) { if ( typeof data.html === 'string' && data.html ) { match = data.html.match(/]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i); @@ -322,7 +349,7 @@ var wappalyzer = (function() { w.log(hostname + ': ' + JSON.stringify(w.ping.hostnames[hostname])); } - if ( typeof w.ping.hostnames === 'object' && Object.keys(w.ping.hostnames).length >= 50 ) { driver('ping'); } + if ( Object.keys(w.ping.hostnames).length >= 50 ) { driver('ping'); } apps = null; data = null; diff --git a/drivers/bookmarklet/json b/drivers/bookmarklet/json index 3747a4c0a..c0b7a9aa7 100644 --- a/drivers/bookmarklet/json +++ b/drivers/bookmarklet/json @@ -813,7 +813,8 @@ "html": "(]+id=\\\"wrapper_r\\\"|<[^>]+(feed|components)/com_|]+class=\\\"pill)", "headers": { "X-Content-Encoded-By": "Joomla" }, "env": "^jcomments$", - "implies": [ "PHP" ] + "implies": [ "PHP" ], + "confidence": { "html": 50, "url": 50 } }, "jqPlot": { "cats": [ 25 ], @@ -930,7 +931,8 @@ "script": "(js/mage|skin/frontend/(default|enterprise))", "headers": { "Set-Cookie": "frontend=" }, "env": "^(Mage|VarienForm)$", - "implies": [ "PHP" ] + "implies": [ "PHP" ], + "confidence": { "headers": 50 } }, "Mambo": { "cats": [ 1 ], @@ -1380,8 +1382,9 @@ "cats": [ 18 ], "script": "/assets/application-[a-z\\d]{32}/\\.js", "meta": { "csrf-param": "authenticity_token" }, - "headers": { "Server": "(mod_rails|mod_rack|Phusion\\.Passenger)", "X-Powered-By": "(mod_rails|mod_rack|Phusion\\.Passenger)" }, - "implies": [ "Ruby" ] + "headers": { "Server": "(mod_rails|mod_rack|Phusion(\\.|_)Passenger)", "X-Powered-By": "(mod_rails|mod_rack|Phusion(\\.|_)Passenger)" }, + "implies": [ "Ruby" ], + "confidence": { "script": 50, "meta": 50, "headers": 50 } }, "S.Builder": { "cats": [ 1 ], @@ -1467,8 +1470,7 @@ "Snoobi": { "cats": [ 10 ], "script": "snoobi\\.com/snoop\\.php", - "env": "^snoobi$", - "implies": [ "Joomla" ] + "env": "^snoobi$" }, "SOBI 2": { "cats": [ 19 ], @@ -1582,7 +1584,7 @@ "Tumblr": { "cats": [ 11 ], "html": "