diff --git a/drivers/firefox/data/js/content.js b/drivers/firefox/data/js/content.js deleted file mode 100644 index 35559c4dd..000000000 --- a/drivers/firefox/data/js/content.js +++ /dev/null @@ -1,72 +0,0 @@ -(function() { - 'use strict'; - - var - data = {}, - lastEnv = [], - prefs = sendSyncMessage('wappalyzer', { action: 'get prefs' })[0] - ; - - addEventListener('DOMContentLoaded', function() { - removeEventListener('DOMContentLoaded', onLoad, false); - - onLoad(); - }, false); - - function onLoad() { - if ( content.document.contentType != 'text/html' ) { - return; - } - - if ( prefs.analyzeJavaScript && prefs.analyzeOnLoad ) { - content.document.documentElement.addEventListener('load', function() { - var env = Object.keys(content.wrappedJSObject).slice(0, 500); - - lastEnv = env; - - // Only analyze new variables - env = { env: env.filter(function(i) { return lastEnv.indexOf(i) === -1; }) }; - - if ( env.length ) { - sendAsyncMessage('wappalyzer', { - action: 'analyze', - analyze: { env: env } - }); - } - - env = null; - - removeEventListener('load', onLoad, true); - }, true); - } - - // HTML - var html = content.document.documentElement.outerHTML; - - // Comments outside HTML - //if ( content.document.lastChild.nodeType === 8 ) { - //content.alert(content.document.lastChild.nodeValue); - //} - - if ( html.length > 50000 ) { - html = html.substring(0, 25000) + html.substring(html.length - 25000, html.length); - } - - data = { html: html }; - - if ( prefs.analyzeJavaScript ) { - data.env = Object.keys(content.wrappedJSObject).slice(0, 500); - - lastEnv = data.env; - } - - sendAsyncMessage('wappalyzer', { - action: 'analyze', - hostname: content.location.hostname, - url: content.location.href, - analyze: data - }); - - data = null; - } -})(); diff --git a/drivers/firefox/data/js/tab.js b/drivers/firefox/data/js/tab.js index 18269eda5..4e573c3ea 100644 --- a/drivers/firefox/data/js/tab.js +++ b/drivers/firefox/data/js/tab.js @@ -2,9 +2,9 @@ var lastEnv = []; try { - if ( document && document.documentElement && document.contentType === 'text/html' ) { + if ( document && document.contentType === 'text/html' ) { var - html = document.documentElement.outerHTML + html = new XMLSerializer().serializeToString(document) env = []; self.port.emit('log', 'init'); diff --git a/drivers/python_raw/__init__.py b/drivers/python_raw/__init__.py new file mode 100644 index 000000000..df05229a7 --- /dev/null +++ b/drivers/python_raw/__init__.py @@ -0,0 +1,2 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- diff --git a/drivers/python_raw/real_test.py b/drivers/python_raw/real_test.py new file mode 100644 index 000000000..6130f9f5f --- /dev/null +++ b/drivers/python_raw/real_test.py @@ -0,0 +1,156 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- +from wappalyzer import Wappalyzer + +TESTS = [ + { + 'url': 'http://www.hardgraft.com', + 'apps': ['jQuery', 'Shopify', 'Nginx'] + }, + { + 'url': 'http://its.bplaced.net', + 'apps': ['WordPress', 'jQuery', 'Apache'] + }, + { + 'url': 'http://www.bodybuilding.com/', + 'apps': ['jQuery', 'Optimizely', 'SiteCatalyst', 'Apache Tomcat'] + }, + { + 'url': 'http://guidedhelp21.weebly.com/', + 'apps': ['Weebly', 'Apache', 'Quantcast', 'Google Analytics', 'jQuery'] + }, + { + 'url': 'http://www.bancadelparque.com/', + 'apps': ['Wix', 'Twitter Bootstrap'] + }, + { + 'url': 'http://joomla.ru/', + 'apps': ['Joomla', 'jQuery', 'MooTools', 'Yandex.Metrika', 'LiteSpeed'] + }, + { + 'url': 'http://demoshop21.e-stile.ru/', + 'apps': ['SiteEdit', 'PHP'] + }, + { + 'url': 'http://umbraco.com', + 'apps': ['Umbraco', 'IIS', 'Microsoft ASP.NET'] + }, + { + 'url': 'http://johnsciacca.webs.com/', + 'apps': ['Webs', 'RequireJS', 'Site Meter', 'Modernizr'] + }, + { + 'url': 'http://www.1c-bitrix.ru/', + 'apps': ['1C-Bitrix', 'Yandex.Metrika'] + }, + { + 'url': 'http://amirocms.com', + 'apps': ['Amiro.CMS'] + }, + { + 'url': 'http://dle-news.ru', + 'apps': ['DataLife Engine', 'CloudFlare'] + }, + { + 'url': 'http://dotnetnuke.com', + 'apps': ['DotNetNuke', 'Microsoft ASP.NET'] + }, + { + 'url': 'http://www.schooldude.com', + 'apps': ['DotNetNuke', 'Microsoft ASP.NET'] + }, + { + 'url': 'http://www.sportsdirect.com/', + 'apps': ['DotNetNuke', 'Microsoft ASP.NET'] + }, + { + 'url': 'http://drupal.org', + 'apps': ['Drupal', 'Varnish'] + }, + { + 'url': 'http://www.komodocms.com/', + 'apps': ['Komodo CMS'] + }, + { + 'url': 'http://livestreetcms.com/', + 'apps': ['LiveStreet CMS'] + }, + { + 'url': 'http://modxcms.com/', + 'apps': ['MODx'] + }, + { + 'url': 'http://modx.ru/', + 'apps': ['MODx'] + }, + { + 'url': 'http://revo.modx.ru/', + 'apps': ['MODx'] + }, + { + 'url': 'http://www.punchbrand.com', + 'apps': ['CS Cart'] + }, + { + 'url': 'http://demo.cs-cart.com/', + 'apps': ['CS Cart'] + }, + { + 'url': 'https://livedemo.installatron.com/1404307206magento/', + 'apps': ['Magento'] + }, + { + 'url': 'http://livedemo.installatron.com/1404300689prestashop/', + 'apps': ['Prestashop'] + }, + { + 'url': 'http://demo.opencart.com/', + 'apps': ['OpenCart'] + }, + { + 'url': 'https://livedemo.installatron.com/1404307206oscommerce/', + 'apps': ['osCommerce'] + }, + { + 'url': 'http://www.ubercartdemo.com/', + 'apps': ['Ubercart'] + }, + { + 'url': 'http://demostore.x-cart.com/', + 'apps': ['X-Cart'] + }, + { + 'url': 'https://livedemo.installatron.com/1404307206zencart/', + 'apps': ['Zen Cart'] + }, + { + 'url': 'http://oreonfray83.wordpress.com', + 'apps': ['WordPress.Com'] + }, + { + 'url': 'http://www.try-phpbb.com/30x/', + 'apps': ['phpBB'] + }, +] + + +def test(): + wappalyzer = Wappalyzer(datafile_path='../../share/apps.json') + + for site in TESTS: + print 'testing %s ...' % site['url'] + result = wappalyzer.analyze(site['url']) + for app in site['apps']: + found = result.pop(app, None) + if found: + print '\t%s\t- ok\tconfidence=%d' % (app, found.get_confidence()) + else: + print '\t%s\t- NOT FOUND' % (app) + return + if result: + print '\tUNEXPECTED APPS:' + for app_name, app in result.iteritems(): + print '\t\t%s\t- ok\tconfidence=%d' % (app_name, app.get_confidence()) + +if __name__ == '__main__': + test() \ No newline at end of file diff --git a/drivers/python_raw/tests.py b/drivers/python_raw/tests.py new file mode 100644 index 000000000..51b8120b8 --- /dev/null +++ b/drivers/python_raw/tests.py @@ -0,0 +1,131 @@ +import re +import unittest +import wappalyzer + + +class FakeUrlopenResponse(object): + def __init__(self, url, html, headers): + self.url = url + self.html = html + self.headers = headers + + def read(self): + return self.html + + def info(self): + _cls = self + + class _Info: + @property + def dict(self): + return _cls.headers + + return _Info() + + +class WappalyzerCustomTestCase(unittest.TestCase): + def setUp(self): + self.wappalyzer = wappalyzer.Wappalyzer({'categories':[],'apps':[]}) + + def get_wappalyzer(self, categories, apps): + return wappalyzer.Wappalyzer({'categories': categories, 'apps': apps}) + + def test_parse_simple(self): + parsed = self.wappalyzer.parse_patterns('control/userimage\\.html') + self.assertEqual(1, len(parsed)) + self.assertTrue(hasattr(parsed[0].regex, 'search')) + + def test_parse_confidence_version(self): + parsed = self.wappalyzer.parse_patterns('control/userimage\\.html\\;version:1\\;confidence:80') + self.assertEqual(1, len(parsed)) + self.assertEqual('1', getattr(parsed[0], 'version')) + self.assertEqual(80, getattr(parsed[0], 'confidence')) + + def _construct_response(self, url=None, headers=None, html=None): + return FakeUrlopenResponse( + url=url or '', + headers=headers or {}, + html=html or '' + ) + + def test_by_url(self): + wappalyzer = self.get_wappalyzer( + {}, + {'test1': {'url': 'mysite\d.com'}, 'test2': {'url': 'hissite\d.com'}, + 'test3': {'url': ['my', 'his']}}) + resp = self._construct_response(url='http://mysite2.com') + + result = wappalyzer.analyze(response=resp) + + self.assertIn('test1', result) + self.assertIn('test3', result) + + def test_by_html_with_confidence(self): + wappalyzer = self.get_wappalyzer( + {}, + {'test1': {'html': 'body\d\\;confidence:70'}, 'test2': {'html': 'body\w'}}) + resp = self._construct_response(html='body123') + + result = wappalyzer.analyze(response=resp) + + self.assertIn('test1', result) + self.assertEqual(70, result['test1'].get_confidence()) + + def test_by_headers(self): + wappalyzer = self.get_wappalyzer({}, + { + 'test1': { + "headers": {"Server": "debut\\/?([\\d\\.]+)?\\;version:\\1"}, + } + }) + resp = self._construct_response(headers={"Server": 'debut'}) + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + + resp = self._construct_response(headers={"Server": 'debut/12'}) + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + + def test_by_meta(self): + wappalyzer = self.get_wappalyzer({}, + { + 'test1': { + "meta": {"generator": "uCore PHP Framework"}, + }, + 'test2': { + "meta": {"generator2": "0"}, + } + }) + resp = self._construct_response(html="") + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + self.assertNotIn('test2', result) + + resp = self._construct_response(html="") + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + self.assertNotIn('test2', result) + + resp = self._construct_response(html="") + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + self.assertNotIn('test2', result) + + def test_by_scripts(self): + wappalyzer = self.get_wappalyzer( + {}, + {'jquery': { + "script": ["jquery(?:\\-|\\.)([\\d.]*\\d)[^/]*\\.js\\;version:\\1", + "/([\\d.]+)/jquery(\\.min)?\\.js\\;version:\\1", "jquery.*\\.js"], + }}) + resp = self._construct_response( + html='') + + result = wappalyzer.analyze(response=resp) + + self.assertIn('jquery', result) + + + +if __name__ == '__main__': + unittest.main() diff --git a/drivers/python_raw/wappalyzer.py b/drivers/python_raw/wappalyzer.py new file mode 100755 index 000000000..73df758f2 --- /dev/null +++ b/drivers/python_raw/wappalyzer.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import re +import sys +import urllib +from urlparse import urlparse + +try: + import json +except ImportError: + import simplejson as json + + +class Application(object): + def __init__(self, app): + self.app = app + self.confidence = {} + self.detected = False + + def set_detected(self, pattern, type, value, key=None): + self.detected = True + self.confidence[type + ' ' + (key + ' ' if key else '') + pattern.str] = pattern.confidence + + # todo: detect version + + def get_confidence(self): + total = sum(v for v in self.confidence.itervalues()) + return min(100, total) + + +class Wappalyzer(object): + def __init__(self, data=None, datafile_path=None): + data = data or self.load_data(datafile_path) + self.categories = data['categories'] + self.apps = data['apps'] + + def load_data(self, datafile_path=NotImplementedError): + if not datafile_path: + file_dir = os.path.dirname(__file__) + datafile_path = os.path.join(file_dir, 'apps.json') + with open(datafile_path) as f: + data = json.load(f) + return data + + def analyze(self, url=None, response=None): + if not response and not url: + raise ValueError + + if not response: + response = urllib.urlopen(url) + + url = response.url.split('#')[0] + html = response.read() + data = { + 'url': url, + 'html': html, + 'script': re.findall(r']+src=(?:"|\')([^"\']+)', html, re.I | re.M), + 'meta': dict((n.lower(), v) for n, v in + re.findall('' % sys.argv[0]) diff --git a/links.sh b/links.sh index 2c7d0b73c..fd1758dd8 100755 --- a/links.sh +++ b/links.sh @@ -25,3 +25,5 @@ ln -f share/js/wappalyzer.js drivers/php/js ln -f share/apps.json drivers/python ln -f share/js/wappalyzer.js drivers/python/js + +ln -f share/apps.json drivers/python_raw \ No newline at end of file diff --git a/share/apps.json b/share/apps.json index ef0e5c5df..16d1f027b 100644 --- a/share/apps.json +++ b/share/apps.json @@ -88,19 +88,20 @@ "AddThis": { "website": "www.addthis.com", "cats": [ 5 ], - "script": "addthis\\.com/js/(\\d+)?\\;version:\\1", + "script": "addthis\\.com/js/", "env": "^addthis" }, "AdInfinity": { "website": "adinfinity.com.au", "cats": [ 36 ], - "script": [ "adinfinity\\.com\\.au" ] + "script": "adinfinity\\.com\\.au" }, "Adobe ColdFusion": { "website": "adobe.com/products/coldfusion-family.html", "cats": [ 18 ], "url": "\\.cfm(?:$|\\?)", "html": "|]+(?:typolight|contao)\\.css)", + "html": [ "", "]+(?:typolight|contao)\\.css" ], + "meta": { "generator": "^Contao Open Source CMS$" }, "implies": "PHP" }, "Contenido": { @@ -628,7 +658,7 @@ "website": "www.cs-cart.com", "cats": [ 6 ], "env": "^fn_compare_strings$", - "html": " Powered by (?:]+cs-cart\\.com|CS-Cart)", + "html": [ " Powered by (?:]+cs-cart\\.com|CS-Cart)", "(?:\\$|jQuery)\\.runCart\\('\\w'\\)" ], "implies": "PHP" }, "CubeCart": { @@ -647,7 +677,7 @@ "D3": { "website": "d3js.org", "cats": [ 25 ], - "script": "d3(?:\\. v[0-9]+)?(?:\\.min)?\\.js", + "script": "d3(?:\\. v\\d+)?(?:\\.min)?\\.js", "env": "^d3$" }, "Dancer": { @@ -736,15 +766,13 @@ "Django": { "website": "djangoproject.com", "cats": [ 18 ], - "html": "(?:powered by ]+>Django ?([\\d.]+)?|
)\\;version:\\1", + "html": "(?:powered by ]+>Django ?([\\d.]+)?|
)\\;version:\\1", "env": "^__admin_media_prefix__", "implies": "Python" }, "Django CMS": { "website": "django-cms.org", "cats": [ 1 ], - "script": "media/cms/js/csrf\\.js", - "headers": { "Set-Cookie": "django[^;]=" }, "implies": "Django" }, "Dojo": { @@ -777,7 +805,8 @@ "website": "dotnetnuke.com", "cats": [ 1 ], "meta": { "generator": "DotNetNuke" }, - "headers": { "X-Compressed-By": "DotNetNuke", "Set-Cookie": "DotNetNukeAnonymous=" }, + "script": "/js/dnncore\\.js", + "headers": { "DNNOutputCache": ".+", "X-Compressed-By": "DotNetNuke", "Set-Cookie": "DotNetNukeAnonymous=" }, "html": ")", + "html": "(?:]* href=\"templates/gambio/|]content\\.php\\?coID=\\d||" + }, "InstantCMS": { "website": "www.instantcms.ru", "cats": [ 1 ], @@ -1333,7 +1409,7 @@ "Intercom": { "website": "intercom.io", "cats": [ 10 ], - "script": "(api\\.intercom\\.io/api|static\\.intercomcdn\\.com/intercom\\.v1)", + "script": "(?:api\\.intercom\\.io/api|static\\.intercomcdn\\.com/intercom\\.v1)", "env": "^Intercom$" }, "Intershop": { @@ -1412,7 +1488,7 @@ }, "Jekyll": { "website": "jekyllrb.com", - "cats": [ 1, 11], + "cats": [ 1, 11 ], "meta": { "generator": "Jekyll(?:v[\\d.]+)?\\;version:\\1" } }, "Jetty": { @@ -1445,7 +1521,7 @@ "meta": { "generator": "Joomla!(?: ([\\d.]+))?\\;version:\\1" }, "html": "(?:]+id=\"wrapper_r\"|<[^>]+(?:feed|components)/com_|]+class=\"pill)\\;confidence:50", "headers": { "X-Content-Encoded-By": "Joomla! ([\\d.]+)\\;version:\\1" }, - "env": "^(jcomments|Joomla)$", + "env": "^(?:jcomments|Joomla)$", "implies": "PHP" }, "jqPlot": { @@ -1580,7 +1656,7 @@ "website": "www.canon.com", "cats": [ 22 ], "headers": { "Server": "KS_HTTP\\/?([\\d\\.]+)?\\;version:\\1" }, - "implies": [ "Canon"] + "implies": "Canon" }, "LabVIEW": { "website": "ni.com/labview", @@ -1593,6 +1669,11 @@ "headers": { "Set-Cookie": "laravel_session" }, "implies": "PHP" }, + "Leaflet": { + "website": "leafletjs.com", + "cats": [ 35 ], + "script": "leaflet.*\\.js" + }, "LEPTON": { "website": "www.lepton-cms.org", "cats": [ 1 ], @@ -1627,7 +1708,7 @@ "website": "lokeshdhakar.com/projects/lightbox2/", "cats": [ 7, 12 ], "script": "lightbox.*\\.js", - "html": "]*href=\"[^\"]+lightbox(\\.min)?\\.css" + "html": "]*href=\"[^\"]+lightbox(?:\\.min)?\\.css" }, "LightMon Engine": { "website": "en.lightmon.ru", @@ -1645,6 +1726,12 @@ "cats": [ 19 ], "headers": { "generator": "LimeSurvey" } }, + "LinkSmart": { + "website": "linksmart.com", + "cats": [ 36 ], + "script": "^https?://cdn\\.linksmart\\.com/linksmart_([\\d.]+?)(?:\\.min)?\\.js\\;version:\\1", + "env": "^(?:_mb_site_guid$|LS_JSON|LinkSmart(?:_|$))" + }, "LiteSpeed": { "website": "litespeedtech.com", "cats": [ 22 ], @@ -1665,6 +1752,7 @@ "LiveStreet CMS": { "website": "livestreetcms.com", "cats": [ 1 ], + "html": "var LIVESTREET_SECURITY_KEY", "headers": { "X-Powered-By": "LiveStreet CMS" } }, "Lockerz Share": { @@ -1676,9 +1764,14 @@ "Locomotive": { "website": "www.locomotivecms.com", "cats": [ 1 ], - "html": "]*/sites/[a-z0-9]{24}/theme/stylesheets/.*>", + "html": "]*/sites/[a-z\\d]{24}/theme/stylesheets/.*>", "implies": [ "Ruby on Rails", "MongoDB" ] }, + "Lo-dash": { + "website": "www.lodash.com", + "cats": [ 12 ], + "script": "lodash.*\\.js" + }, "Logitech Media Server": { "website": "www.mysqueezebox.com", "cats": [ 22, 38 ], @@ -1697,12 +1790,12 @@ "M.R. Inc Webserver": { "website": "mrincworld.com", "cats": [ 22 ], - "headers": { "Server": "M\\.R\\. Inc Inc Webserver" } + "headers": { "Server": "M\\.R\\. Inc Webserver" } }, "M.R. Inc Wild CMS": { "website": "mrincworld.com", "cats": [ 1, 6 ], - "headers": { "X-Powered-By": "M\\.R\\. Inc Inc Wild CMS" } + "headers": { "X-Powered-By": "M\\.R\\. Inc Wild CMS" } }, "Magento": { "website": "www.magentocommerce.com", @@ -1738,7 +1831,7 @@ "MathJax": { "website": "mathjax.org", "cats": [ 25 ], - "script": "mathjax.js", + "script": "mathjax\\.js", "env": "^MathJax$" }, "math.js": { @@ -1760,7 +1853,7 @@ "MediaTomb": { "website": "mediatomb.cc", "cats": [ 38 ], - "headers": { "Server": "MediaTomb(?:/([0-9.]+))?\\;version:\\1" } + "headers": { "Server": "MediaTomb(?:/([\\d.]+))?\\;version:\\1" } }, "MediaWiki": { "website": "www.mediawiki.org", @@ -1771,14 +1864,14 @@ "Meebo": { "website": "www.meebo.com", "cats": [ 5 ], - "html": "(?: