diff --git a/drivers/python_raw/__init__.py b/drivers/python_raw/__init__.py new file mode 100644 index 000000000..df05229a7 --- /dev/null +++ b/drivers/python_raw/__init__.py @@ -0,0 +1,2 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- diff --git a/drivers/python_raw/real_test.py b/drivers/python_raw/real_test.py new file mode 100644 index 000000000..6130f9f5f --- /dev/null +++ b/drivers/python_raw/real_test.py @@ -0,0 +1,156 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- +from wappalyzer import Wappalyzer + +TESTS = [ + { + 'url': 'http://www.hardgraft.com', + 'apps': ['jQuery', 'Shopify', 'Nginx'] + }, + { + 'url': 'http://its.bplaced.net', + 'apps': ['WordPress', 'jQuery', 'Apache'] + }, + { + 'url': 'http://www.bodybuilding.com/', + 'apps': ['jQuery', 'Optimizely', 'SiteCatalyst', 'Apache Tomcat'] + }, + { + 'url': 'http://guidedhelp21.weebly.com/', + 'apps': ['Weebly', 'Apache', 'Quantcast', 'Google Analytics', 'jQuery'] + }, + { + 'url': 'http://www.bancadelparque.com/', + 'apps': ['Wix', 'Twitter Bootstrap'] + }, + { + 'url': 'http://joomla.ru/', + 'apps': ['Joomla', 'jQuery', 'MooTools', 'Yandex.Metrika', 'LiteSpeed'] + }, + { + 'url': 'http://demoshop21.e-stile.ru/', + 'apps': ['SiteEdit', 'PHP'] + }, + { + 'url': 'http://umbraco.com', + 'apps': ['Umbraco', 'IIS', 'Microsoft ASP.NET'] + }, + { + 'url': 'http://johnsciacca.webs.com/', + 'apps': ['Webs', 'RequireJS', 'Site Meter', 'Modernizr'] + }, + { + 'url': 'http://www.1c-bitrix.ru/', + 'apps': ['1C-Bitrix', 'Yandex.Metrika'] + }, + { + 'url': 'http://amirocms.com', + 'apps': ['Amiro.CMS'] + }, + { + 'url': 'http://dle-news.ru', + 'apps': ['DataLife Engine', 'CloudFlare'] + }, + { + 'url': 'http://dotnetnuke.com', + 'apps': ['DotNetNuke', 'Microsoft ASP.NET'] + }, + { + 'url': 'http://www.schooldude.com', + 'apps': ['DotNetNuke', 'Microsoft ASP.NET'] + }, + { + 'url': 'http://www.sportsdirect.com/', + 'apps': ['DotNetNuke', 'Microsoft ASP.NET'] + }, + { + 'url': 'http://drupal.org', + 'apps': ['Drupal', 'Varnish'] + }, + { + 'url': 'http://www.komodocms.com/', + 'apps': ['Komodo CMS'] + }, + { + 'url': 'http://livestreetcms.com/', + 'apps': ['LiveStreet CMS'] + }, + { + 'url': 'http://modxcms.com/', + 'apps': ['MODx'] + }, + { + 'url': 'http://modx.ru/', + 'apps': ['MODx'] + }, + { + 'url': 'http://revo.modx.ru/', + 'apps': ['MODx'] + }, + { + 'url': 'http://www.punchbrand.com', + 'apps': ['CS Cart'] + }, + { + 'url': 'http://demo.cs-cart.com/', + 'apps': ['CS Cart'] + }, + { + 'url': 'https://livedemo.installatron.com/1404307206magento/', + 'apps': ['Magento'] + }, + { + 'url': 'http://livedemo.installatron.com/1404300689prestashop/', + 'apps': ['Prestashop'] + }, + { + 'url': 'http://demo.opencart.com/', + 'apps': ['OpenCart'] + }, + { + 'url': 'https://livedemo.installatron.com/1404307206oscommerce/', + 'apps': ['osCommerce'] + }, + { + 'url': 'http://www.ubercartdemo.com/', + 'apps': ['Ubercart'] + }, + { + 'url': 'http://demostore.x-cart.com/', + 'apps': ['X-Cart'] + }, + { + 'url': 'https://livedemo.installatron.com/1404307206zencart/', + 'apps': ['Zen Cart'] + }, + { + 'url': 'http://oreonfray83.wordpress.com', + 'apps': ['WordPress.Com'] + }, + { + 'url': 'http://www.try-phpbb.com/30x/', + 'apps': ['phpBB'] + }, +] + + +def test(): + wappalyzer = Wappalyzer(datafile_path='../../share/apps.json') + + for site in TESTS: + print 'testing %s ...' % site['url'] + result = wappalyzer.analyze(site['url']) + for app in site['apps']: + found = result.pop(app, None) + if found: + print '\t%s\t- ok\tconfidence=%d' % (app, found.get_confidence()) + else: + print '\t%s\t- NOT FOUND' % (app) + return + if result: + print '\tUNEXPECTED APPS:' + for app_name, app in result.iteritems(): + print '\t\t%s\t- ok\tconfidence=%d' % (app_name, app.get_confidence()) + +if __name__ == '__main__': + test() \ No newline at end of file diff --git a/drivers/python_raw/tests.py b/drivers/python_raw/tests.py new file mode 100644 index 000000000..51b8120b8 --- /dev/null +++ b/drivers/python_raw/tests.py @@ -0,0 +1,131 @@ +import re +import unittest +import wappalyzer + + +class FakeUrlopenResponse(object): + def __init__(self, url, html, headers): + self.url = url + self.html = html + self.headers = headers + + def read(self): + return self.html + + def info(self): + _cls = self + + class _Info: + @property + def dict(self): + return _cls.headers + + return _Info() + + +class WappalyzerCustomTestCase(unittest.TestCase): + def setUp(self): + self.wappalyzer = wappalyzer.Wappalyzer({'categories':[],'apps':[]}) + + def get_wappalyzer(self, categories, apps): + return wappalyzer.Wappalyzer({'categories': categories, 'apps': apps}) + + def test_parse_simple(self): + parsed = self.wappalyzer.parse_patterns('control/userimage\\.html') + self.assertEqual(1, len(parsed)) + self.assertTrue(hasattr(parsed[0].regex, 'search')) + + def test_parse_confidence_version(self): + parsed = self.wappalyzer.parse_patterns('control/userimage\\.html\\;version:1\\;confidence:80') + self.assertEqual(1, len(parsed)) + self.assertEqual('1', getattr(parsed[0], 'version')) + self.assertEqual(80, getattr(parsed[0], 'confidence')) + + def _construct_response(self, url=None, headers=None, html=None): + return FakeUrlopenResponse( + url=url or '', + headers=headers or {}, + html=html or '' + ) + + def test_by_url(self): + wappalyzer = self.get_wappalyzer( + {}, + {'test1': {'url': 'mysite\d.com'}, 'test2': {'url': 'hissite\d.com'}, + 'test3': {'url': ['my', 'his']}}) + resp = self._construct_response(url='http://mysite2.com') + + result = wappalyzer.analyze(response=resp) + + self.assertIn('test1', result) + self.assertIn('test3', result) + + def test_by_html_with_confidence(self): + wappalyzer = self.get_wappalyzer( + {}, + {'test1': {'html': 'body\d\\;confidence:70'}, 'test2': {'html': 'body\w'}}) + resp = self._construct_response(html='body123') + + result = wappalyzer.analyze(response=resp) + + self.assertIn('test1', result) + self.assertEqual(70, result['test1'].get_confidence()) + + def test_by_headers(self): + wappalyzer = self.get_wappalyzer({}, + { + 'test1': { + "headers": {"Server": "debut\\/?([\\d\\.]+)?\\;version:\\1"}, + } + }) + resp = self._construct_response(headers={"Server": 'debut'}) + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + + resp = self._construct_response(headers={"Server": 'debut/12'}) + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + + def test_by_meta(self): + wappalyzer = self.get_wappalyzer({}, + { + 'test1': { + "meta": {"generator": "uCore PHP Framework"}, + }, + 'test2': { + "meta": {"generator2": "0"}, + } + }) + resp = self._construct_response(html="") + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + self.assertNotIn('test2', result) + + resp = self._construct_response(html="") + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + self.assertNotIn('test2', result) + + resp = self._construct_response(html="") + result = wappalyzer.analyze(response=resp) + self.assertIn('test1', result) + self.assertNotIn('test2', result) + + def test_by_scripts(self): + wappalyzer = self.get_wappalyzer( + {}, + {'jquery': { + "script": ["jquery(?:\\-|\\.)([\\d.]*\\d)[^/]*\\.js\\;version:\\1", + "/([\\d.]+)/jquery(\\.min)?\\.js\\;version:\\1", "jquery.*\\.js"], + }}) + resp = self._construct_response( + html='') + + result = wappalyzer.analyze(response=resp) + + self.assertIn('jquery', result) + + + +if __name__ == '__main__': + unittest.main() diff --git a/drivers/python_raw/wappalyzer.py b/drivers/python_raw/wappalyzer.py new file mode 100755 index 000000000..73df758f2 --- /dev/null +++ b/drivers/python_raw/wappalyzer.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import re +import sys +import urllib +from urlparse import urlparse + +try: + import json +except ImportError: + import simplejson as json + + +class Application(object): + def __init__(self, app): + self.app = app + self.confidence = {} + self.detected = False + + def set_detected(self, pattern, type, value, key=None): + self.detected = True + self.confidence[type + ' ' + (key + ' ' if key else '') + pattern.str] = pattern.confidence + + # todo: detect version + + def get_confidence(self): + total = sum(v for v in self.confidence.itervalues()) + return min(100, total) + + +class Wappalyzer(object): + def __init__(self, data=None, datafile_path=None): + data = data or self.load_data(datafile_path) + self.categories = data['categories'] + self.apps = data['apps'] + + def load_data(self, datafile_path=NotImplementedError): + if not datafile_path: + file_dir = os.path.dirname(__file__) + datafile_path = os.path.join(file_dir, 'apps.json') + with open(datafile_path) as f: + data = json.load(f) + return data + + def analyze(self, url=None, response=None): + if not response and not url: + raise ValueError + + if not response: + response = urllib.urlopen(url) + + url = response.url.split('#')[0] + html = response.read() + data = { + 'url': url, + 'html': html, + 'script': re.findall(r']+src=(?:"|\')([^"\']+)', html, re.I | re.M), + 'meta': dict((n.lower(), v) for n, v in + re.findall('' % sys.argv[0]) diff --git a/links.sh b/links.sh index 2c7d0b73c..fd1758dd8 100755 --- a/links.sh +++ b/links.sh @@ -25,3 +25,5 @@ ln -f share/js/wappalyzer.js drivers/php/js ln -f share/apps.json drivers/python ln -f share/js/wappalyzer.js drivers/python/js + +ln -f share/apps.json drivers/python_raw \ No newline at end of file diff --git a/share/apps.json b/share/apps.json index 30b2a6f89..5b8c6eb40 100644 --- a/share/apps.json +++ b/share/apps.json @@ -628,7 +628,7 @@ "website": "www.cs-cart.com", "cats": [ 6 ], "env": "^fn_compare_strings$", - "html": " Powered by (?:]+cs-cart\\.com|CS-Cart)", + "html": [" Powered by (?:]+cs-cart\\.com|CS-Cart)", "(?:\\$|jQuery)\\.runCart\\('\\w'\\)"], "implies": "PHP" }, "CubeCart": { @@ -743,8 +743,6 @@ "Django CMS": { "website": "django-cms.org", "cats": [ 1 ], - "script": "media/cms/js/csrf\\.js", - "headers": { "Set-Cookie": "django[^;]=" }, "implies": "Django" }, "Dojo": { @@ -777,7 +775,8 @@ "website": "dotnetnuke.com", "cats": [ 1 ], "meta": { "generator": "DotNetNuke" }, - "headers": { "X-Compressed-By": "DotNetNuke", "Set-Cookie": "DotNetNukeAnonymous=" }, + "script":["/js/dnncore\\.js"], + "headers": { "DNNOutputCache":".+", "X-Compressed-By": "DotNetNuke", "Set-Cookie": "DotNetNukeAnonymous=" }, "html": "