From 512b36aa23ee2be04ae1a2b4220355710e2e3214 Mon Sep 17 00:00:00 2001 From: Elbert Alias Date: Sun, 1 Mar 2015 12:35:06 +1100 Subject: [PATCH] Added regex validation script, improved patterns --- bin/wappalyzer-validate | 4 ++ bin/wappalyzer-validate-icons | 7 +- bin/wappalyzer-validate-regex | 50 +++++++++++++ docker/node/package.json | 1 - src/apps.json | 130 +++++++++++++++------------------- 5 files changed, 116 insertions(+), 76 deletions(-) create mode 100755 bin/wappalyzer-validate-regex diff --git a/bin/wappalyzer-validate b/bin/wappalyzer-validate index 41018c211..a6e8c691e 100755 --- a/bin/wappalyzer-validate +++ b/bin/wappalyzer-validate @@ -29,6 +29,10 @@ echo "Validating apps.json..." jsonlint --quiet -V $WAPPALYZER_ROOT/schema.json $path/apps.json +echo "Validating regular expressions..." + +wappalyzer validate-regex + echo "Validating icons..." wappalyzer validate-icons diff --git a/bin/wappalyzer-validate-icons b/bin/wappalyzer-validate-icons index 49676f0f0..847d1d29c 100755 --- a/bin/wappalyzer-validate-icons +++ b/bin/wappalyzer-validate-icons @@ -1,13 +1,14 @@ #!/usr/bin/env node var + app, modulesPath = process.env.WAPPALYZER_NODE_PATH !== undefined ? process.env.WAPPALYZER_NODE_PATH + '/node_modules/' : '', + fs = require('fs'), fileType = require(modulesPath + 'file-type'), - fs = require(modulesPath + 'fs-extra'), readChunk = require(modulesPath + 'read-chunk') json = require(process.env.WAPPALYZER_ROOT + '/src/apps.json'); -Object.keys(json.apps).forEach(function(app) { +for ( app in json.apps ) { var path = process.env.WAPPALYZER_ROOT + '/src/icons/' + app + '.png'; fs.exists(path, function(exists) { @@ -23,4 +24,4 @@ Object.keys(json.apps).forEach(function(app) { throw new Error('Missing file: src/icons/' + app + '.png'); } }); -}); +}; diff --git a/bin/wappalyzer-validate-regex b/bin/wappalyzer-validate-regex new file mode 100755 index 000000000..2f72564e1 --- /dev/null +++ b/bin/wappalyzer-validate-regex @@ -0,0 +1,50 @@ +#!/usr/bin/env node + +var + app, + modulesPath = process.env.WAPPALYZER_NODE_PATH !== undefined ? process.env.WAPPALYZER_NODE_PATH + '/node_modules/' : '', + json = require(process.env.WAPPALYZER_ROOT + '/src/apps.json'); + +for ( app in json.apps ) { + ['headers', 'html', 'env', 'meta', 'script'].forEach(function(type) { + var + key, + patterns = json.apps[app][type]; + + if ( patterns !== undefined ) { + patterns = typeof patterns === 'string' ? [patterns] : patterns; + + if ( !( patterns instanceof Array ) ) { + patterns = []; + + for ( key in json.apps[app][type] ) { + patterns.push(json.apps[app][type][key]); + } + } + + patterns.forEach(function(pattern) { + var + attrs = pattern.split('\\;'), + regex = '/' + attrs.shift().replace('/', '\/') + '/'; + + if ( /^\/(?:\^\$|\.\+|\.\*)\/$/.test(regex) ) { + throw new Error('Pattern should be replaced with empty string.\n' + app + ': ' + type + ': ' + pattern); + } + + if ( type === 'html' ) { + if ( /\.(?:\+|\*)/.test(regex) ) { + throw new Error('Avoid ".+" and ".*" in HTML patterns. Consider using "[^>]+" or "[^<]+" instead.\n' + app + ': ' + type + ': ' + pattern); + } + + if ( !/[<>]/.test(regex) ) { + throw new Error('HTML patterns must contain "<" or ">".\n' + app + ': ' + type + ': ' + pattern); + } + } + }); + } + }); + + if ( /[a-z]+:\/\//i.test(json.apps[app].website) ) { + throw new Error('Do not include the protocol in the website URL\n' + app + ': ' + json.apps[app].website); + } +} diff --git a/docker/node/package.json b/docker/node/package.json index db55543bb..dec35339d 100644 --- a/docker/node/package.json +++ b/docker/node/package.json @@ -1,7 +1,6 @@ { "devDependencies": { "file-type": "2.2.*", - "fs-extra": "0.16.*", "read-chunk": "1.0.*" } } diff --git a/src/apps.json b/src/apps.json index 6448eb852..f76da82af 100755 --- a/src/apps.json +++ b/src/apps.json @@ -187,11 +187,10 @@ ], "html": [ "
]+data-component-path=\"[^\"+]jcr:" ], "implies": "Java", + "script": "/etc/designs/", "website": "adobe.com/products/cq.html" }, "Adobe ColdFusion": { @@ -332,7 +331,6 @@ "cats": [ 1 ], - "html": "system_(?:js\\.php\\?script=|css\\.php\\?styles)[^\"]+cv=([\\d.]+)\\;version:\\1", "implies": "PHP", "meta": { "generator": "Amiro" @@ -378,14 +376,14 @@ "cats": [ 34 ], - "html": "type=\"text/css\" href=\"/static/hbase\\.css\"", + "html": "]+static/hbase", "website": "hbase.apache.org" }, "Apache Hadoop": { "cats": [ 34 ], - "html": "type=\"text/css\" href=\"/static/hadoop\\.css\"", + "html": "]+static/hadoop", "website": "hadoop.apache.org" }, "Apache JSPWiki": { @@ -436,7 +434,7 @@ "cats": [ 2 ], - "html": "ping\\.src = node\\.href;", + "html": "ping\\.src = node\\.href;\\s+[^>]+\\s+}\\s+", "website": "arclanguage.org" }, "Artifactory": { @@ -667,7 +665,7 @@ ], "implies": "Google Analytics", "script": "boba(\\.min)?\\.js", - "website": "http://boba.space150.com/" + "website": "boba.space150.com" }, "Bolt": { "cats": [ @@ -734,7 +732,7 @@ "cats": [ 2 ], - "html": "]+woltlab\\.com.+Burning Board", + "html": "]+woltlab\\.com[^<]+Burning Board", "implies": "PHP", "website": "www.woltlab.com" }, @@ -824,7 +822,7 @@ "env": "^fn_compare_strings$", "html": [ " Powered by (?:]+cs-cart\\.com|CS-Cart)", - "(?:\\$|jQuery)\\.runCart\\('\\w'\\)" + ".cm-noscript[^>]+" ], "implies": "PHP", "website": "www.cs-cart.com" @@ -952,7 +950,7 @@ "cats": [ 10 ], - "html": "function loadChartbeat\\(\\) \\{", + "env": "^_sf_(?:endpt|async_config)$", "script": "chartbeat\\.js", "website": "chartbeat.com" }, @@ -1014,7 +1012,6 @@ 10 ], "env": "^ClickTale", - "html": "if\\(typeof ClickTale\\(Tag\\)*==\\\"function\\\"\\)", "website": "www.clicktale.com" }, "Clicky": { @@ -1286,8 +1283,7 @@ 1 ], "html": [ - "]+>Site Powered by DTG", - "var u=\\(\\('https:' == d\\.location\\.protocol\\) \\? 'https://resellerstat\\.mono\\.net/dtg/' : 'http://resellerstat\\.mono\\.net/dtg/'\\);" + "]+Site Powered by DTG" ], "implies": "Mono.net", "website": "www.dtg.nl" @@ -1561,7 +1557,7 @@ "cats": [ 6 ], - "html": "(?:id=\"block[_-]commerce[_-]cart[_-]cart|class=\"commerce[_-]product[_-]field)", + "html": "<[^]+(?:id=\"block[_-]commerce[_-]cart[_-]cart|class=\"commerce[_-]product[_-]field)", "implies": "Drupal", "website": "drupalcommerce.org" }, @@ -1722,10 +1718,10 @@ "cats": [ 25 ], - "html": "]+?href=\"[^\"]+epoch(?:\\.min)?\\.css", "implies": "D3", "script": "epoch(\\.min)?\\.js", - "website": "https://fastly.github.io/epoch/" + "website": "fastly.github.io/epoch" }, "Epom": { "cats": [ @@ -2068,7 +2064,7 @@ "meta": { "description": "GitLab Continuous Integration" }, - "website": "https://about.gitlab.com/gitlab-ci/" + "website": "about.gitlab.com/gitlab-ci" }, "GlassFish": { "cats": [ @@ -2129,8 +2125,7 @@ "headers": { "Set-Cookie": "__utma" }, - "html": "_gaq\\.push\\(\\['_setAccount|i\\['GoogleAnalyticsObject'\\]|ga\\.async = true", - "script": "^https?://[^\\/]+\\.google-analytics\\.com\\/(?:ga|urchin|(analytics))\\.js\\;version:\\1?Universal Analytics:", + "script": "^https?://[^\\/]+\\.google-analytics\\.com\\/(?:ga|urchin|(analytics))\\.js\\;version:\\1?UA:", "website": "google.com/analytics" }, "Google App Engine": { @@ -2250,7 +2245,7 @@ 19 ], "env": "^Gravatar$", - "html": "gravatar\\.com/avatar/", + "html": "<[^]+gravatar\\.com/avatar/", "website": "gravatar.com" }, "Gravity Insights": { @@ -2344,7 +2339,7 @@ ], "env": "^Hammer$", "script": "hammer(\\.min)?\\.js", - "website": "http://hammerjs.github.io" + "website": "hammerjs.github.io" }, "Handlebars": { "cats": [ @@ -2384,8 +2379,8 @@ "cats": [ 5 ], - "html": "/hellobar\\.js", - "script": "(?:hellobar\\.com/hellobar\\.js|new HelloBar)", + "env": "^HelloBar$", + "script": "hellobar\\.js", "website": "hellobar.com" }, "Hiawatha": { @@ -2459,7 +2454,7 @@ "headers": { "Set-Cookie": "_hybris" }, - "html": "(?:/sys_master/|/hybr/|/_ui/desktop/)", + "html": "<[^]+(?:/sys_master/|/hybr/|/_ui/desktop/)", "implies": "Java", "website": "hybris.com" }, @@ -3070,7 +3065,7 @@ 12 ], "script": "lazy(\\.browser)?(\\.min)?\\.js", - "website": "http://danieltao.com/lazy.js/" + "website": "danieltao.com/lazy.js" }, "Leaflet": { "cats": [ @@ -3168,10 +3163,10 @@ "cats": [ 1 ], + "env": "^LIVESTREET", "headers": { "X-Powered-By": "LiveStreet CMS" }, - "html": "var LIVESTREET_SECURITY_KEY", "website": "livestreetcms.com" }, "Livefyre": { @@ -3206,7 +3201,7 @@ "cats": [ 1 ], - "html": "]*/sites/[a-z\\d]{24}/theme/stylesheets/.*>", + "html": "]*/sites/[a-z\\d]{24}/theme/stylesheets", "implies": [ "Ruby on Rails", "MongoDB" @@ -3330,7 +3325,6 @@ "cats": [ 5 ], - "html": "\\/assets\\/js\\/manycontacts\\.min\\.js", "script": "\\/assets\\/js\\/manycontacts\\.min\\.js", "website": "www.manycontacts.com" }, @@ -3450,7 +3444,7 @@ "cats": [ 2 ], - "html": "]+minibb.+\\s+