diff --git a/drivers/python_raw/__init__.py b/drivers/python_raw/__init__.py
new file mode 100644
index 000000000..df05229a7
--- /dev/null
+++ b/drivers/python_raw/__init__.py
@@ -0,0 +1,2 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
diff --git a/drivers/python_raw/real_test.py b/drivers/python_raw/real_test.py
new file mode 100644
index 000000000..6130f9f5f
--- /dev/null
+++ b/drivers/python_raw/real_test.py
@@ -0,0 +1,156 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+from wappalyzer import Wappalyzer
+
+TESTS = [
+ {
+ 'url': 'http://www.hardgraft.com',
+ 'apps': ['jQuery', 'Shopify', 'Nginx']
+ },
+ {
+ 'url': 'http://its.bplaced.net',
+ 'apps': ['WordPress', 'jQuery', 'Apache']
+ },
+ {
+ 'url': 'http://www.bodybuilding.com/',
+ 'apps': ['jQuery', 'Optimizely', 'SiteCatalyst', 'Apache Tomcat']
+ },
+ {
+ 'url': 'http://guidedhelp21.weebly.com/',
+ 'apps': ['Weebly', 'Apache', 'Quantcast', 'Google Analytics', 'jQuery']
+ },
+ {
+ 'url': 'http://www.bancadelparque.com/',
+ 'apps': ['Wix', 'Twitter Bootstrap']
+ },
+ {
+ 'url': 'http://joomla.ru/',
+ 'apps': ['Joomla', 'jQuery', 'MooTools', 'Yandex.Metrika', 'LiteSpeed']
+ },
+ {
+ 'url': 'http://demoshop21.e-stile.ru/',
+ 'apps': ['SiteEdit', 'PHP']
+ },
+ {
+ 'url': 'http://umbraco.com',
+ 'apps': ['Umbraco', 'IIS', 'Microsoft ASP.NET']
+ },
+ {
+ 'url': 'http://johnsciacca.webs.com/',
+ 'apps': ['Webs', 'RequireJS', 'Site Meter', 'Modernizr']
+ },
+ {
+ 'url': 'http://www.1c-bitrix.ru/',
+ 'apps': ['1C-Bitrix', 'Yandex.Metrika']
+ },
+ {
+ 'url': 'http://amirocms.com',
+ 'apps': ['Amiro.CMS']
+ },
+ {
+ 'url': 'http://dle-news.ru',
+ 'apps': ['DataLife Engine', 'CloudFlare']
+ },
+ {
+ 'url': 'http://dotnetnuke.com',
+ 'apps': ['DotNetNuke', 'Microsoft ASP.NET']
+ },
+ {
+ 'url': 'http://www.schooldude.com',
+ 'apps': ['DotNetNuke', 'Microsoft ASP.NET']
+ },
+ {
+ 'url': 'http://www.sportsdirect.com/',
+ 'apps': ['DotNetNuke', 'Microsoft ASP.NET']
+ },
+ {
+ 'url': 'http://drupal.org',
+ 'apps': ['Drupal', 'Varnish']
+ },
+ {
+ 'url': 'http://www.komodocms.com/',
+ 'apps': ['Komodo CMS']
+ },
+ {
+ 'url': 'http://livestreetcms.com/',
+ 'apps': ['LiveStreet CMS']
+ },
+ {
+ 'url': 'http://modxcms.com/',
+ 'apps': ['MODx']
+ },
+ {
+ 'url': 'http://modx.ru/',
+ 'apps': ['MODx']
+ },
+ {
+ 'url': 'http://revo.modx.ru/',
+ 'apps': ['MODx']
+ },
+ {
+ 'url': 'http://www.punchbrand.com',
+ 'apps': ['CS Cart']
+ },
+ {
+ 'url': 'http://demo.cs-cart.com/',
+ 'apps': ['CS Cart']
+ },
+ {
+ 'url': 'https://livedemo.installatron.com/1404307206magento/',
+ 'apps': ['Magento']
+ },
+ {
+ 'url': 'http://livedemo.installatron.com/1404300689prestashop/',
+ 'apps': ['Prestashop']
+ },
+ {
+ 'url': 'http://demo.opencart.com/',
+ 'apps': ['OpenCart']
+ },
+ {
+ 'url': 'https://livedemo.installatron.com/1404307206oscommerce/',
+ 'apps': ['osCommerce']
+ },
+ {
+ 'url': 'http://www.ubercartdemo.com/',
+ 'apps': ['Ubercart']
+ },
+ {
+ 'url': 'http://demostore.x-cart.com/',
+ 'apps': ['X-Cart']
+ },
+ {
+ 'url': 'https://livedemo.installatron.com/1404307206zencart/',
+ 'apps': ['Zen Cart']
+ },
+ {
+ 'url': 'http://oreonfray83.wordpress.com',
+ 'apps': ['WordPress.Com']
+ },
+ {
+ 'url': 'http://www.try-phpbb.com/30x/',
+ 'apps': ['phpBB']
+ },
+]
+
+
+def test():
+ wappalyzer = Wappalyzer(datafile_path='../../share/apps.json')
+
+ for site in TESTS:
+ print 'testing %s ...' % site['url']
+ result = wappalyzer.analyze(site['url'])
+ for app in site['apps']:
+ found = result.pop(app, None)
+ if found:
+ print '\t%s\t- ok\tconfidence=%d' % (app, found.get_confidence())
+ else:
+ print '\t%s\t- NOT FOUND' % (app)
+ return
+ if result:
+ print '\tUNEXPECTED APPS:'
+ for app_name, app in result.iteritems():
+ print '\t\t%s\t- ok\tconfidence=%d' % (app_name, app.get_confidence())
+
+if __name__ == '__main__':
+ test()
\ No newline at end of file
diff --git a/drivers/python_raw/tests.py b/drivers/python_raw/tests.py
new file mode 100644
index 000000000..51b8120b8
--- /dev/null
+++ b/drivers/python_raw/tests.py
@@ -0,0 +1,131 @@
+import re
+import unittest
+import wappalyzer
+
+
+class FakeUrlopenResponse(object):
+ def __init__(self, url, html, headers):
+ self.url = url
+ self.html = html
+ self.headers = headers
+
+ def read(self):
+ return self.html
+
+ def info(self):
+ _cls = self
+
+ class _Info:
+ @property
+ def dict(self):
+ return _cls.headers
+
+ return _Info()
+
+
+class WappalyzerCustomTestCase(unittest.TestCase):
+ def setUp(self):
+ self.wappalyzer = wappalyzer.Wappalyzer({'categories':[],'apps':[]})
+
+ def get_wappalyzer(self, categories, apps):
+ return wappalyzer.Wappalyzer({'categories': categories, 'apps': apps})
+
+ def test_parse_simple(self):
+ parsed = self.wappalyzer.parse_patterns('control/userimage\\.html')
+ self.assertEqual(1, len(parsed))
+ self.assertTrue(hasattr(parsed[0].regex, 'search'))
+
+ def test_parse_confidence_version(self):
+ parsed = self.wappalyzer.parse_patterns('control/userimage\\.html\\;version:1\\;confidence:80')
+ self.assertEqual(1, len(parsed))
+ self.assertEqual('1', getattr(parsed[0], 'version'))
+ self.assertEqual(80, getattr(parsed[0], 'confidence'))
+
+ def _construct_response(self, url=None, headers=None, html=None):
+ return FakeUrlopenResponse(
+ url=url or '',
+ headers=headers or {},
+ html=html or ''
+ )
+
+ def test_by_url(self):
+ wappalyzer = self.get_wappalyzer(
+ {},
+ {'test1': {'url': 'mysite\d.com'}, 'test2': {'url': 'hissite\d.com'},
+ 'test3': {'url': ['my', 'his']}})
+ resp = self._construct_response(url='http://mysite2.com')
+
+ result = wappalyzer.analyze(response=resp)
+
+ self.assertIn('test1', result)
+ self.assertIn('test3', result)
+
+ def test_by_html_with_confidence(self):
+ wappalyzer = self.get_wappalyzer(
+ {},
+ {'test1': {'html': 'body\d\\;confidence:70'}, 'test2': {'html': 'body\w'}})
+ resp = self._construct_response(html='body123')
+
+ result = wappalyzer.analyze(response=resp)
+
+ self.assertIn('test1', result)
+ self.assertEqual(70, result['test1'].get_confidence())
+
+ def test_by_headers(self):
+ wappalyzer = self.get_wappalyzer({},
+ {
+ 'test1': {
+ "headers": {"Server": "debut\\/?([\\d\\.]+)?\\;version:\\1"},
+ }
+ })
+ resp = self._construct_response(headers={"Server": 'debut'})
+ result = wappalyzer.analyze(response=resp)
+ self.assertIn('test1', result)
+
+ resp = self._construct_response(headers={"Server": 'debut/12'})
+ result = wappalyzer.analyze(response=resp)
+ self.assertIn('test1', result)
+
+ def test_by_meta(self):
+ wappalyzer = self.get_wappalyzer({},
+ {
+ 'test1': {
+ "meta": {"generator": "uCore PHP Framework"},
+ },
+ 'test2': {
+ "meta": {"generator2": "0"},
+ }
+ })
+ resp = self._construct_response(html="")
+ result = wappalyzer.analyze(response=resp)
+ self.assertIn('test1', result)
+ self.assertNotIn('test2', result)
+
+ resp = self._construct_response(html="")
+ result = wappalyzer.analyze(response=resp)
+ self.assertIn('test1', result)
+ self.assertNotIn('test2', result)
+
+ resp = self._construct_response(html="")
+ result = wappalyzer.analyze(response=resp)
+ self.assertIn('test1', result)
+ self.assertNotIn('test2', result)
+
+ def test_by_scripts(self):
+ wappalyzer = self.get_wappalyzer(
+ {},
+ {'jquery': {
+ "script": ["jquery(?:\\-|\\.)([\\d.]*\\d)[^/]*\\.js\\;version:\\1",
+ "/([\\d.]+)/jquery(\\.min)?\\.js\\;version:\\1", "jquery.*\\.js"],
+ }})
+ resp = self._construct_response(
+ html='')
+
+ result = wappalyzer.analyze(response=resp)
+
+ self.assertIn('jquery', result)
+
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/drivers/python_raw/wappalyzer.py b/drivers/python_raw/wappalyzer.py
new file mode 100755
index 000000000..73df758f2
--- /dev/null
+++ b/drivers/python_raw/wappalyzer.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import sys
+import urllib
+from urlparse import urlparse
+
+try:
+ import json
+except ImportError:
+ import simplejson as json
+
+
+class Application(object):
+ def __init__(self, app):
+ self.app = app
+ self.confidence = {}
+ self.detected = False
+
+ def set_detected(self, pattern, type, value, key=None):
+ self.detected = True
+ self.confidence[type + ' ' + (key + ' ' if key else '') + pattern.str] = pattern.confidence
+
+ # todo: detect version
+
+ def get_confidence(self):
+ total = sum(v for v in self.confidence.itervalues())
+ return min(100, total)
+
+
+class Wappalyzer(object):
+ def __init__(self, data=None, datafile_path=None):
+ data = data or self.load_data(datafile_path)
+ self.categories = data['categories']
+ self.apps = data['apps']
+
+ def load_data(self, datafile_path=NotImplementedError):
+ if not datafile_path:
+ file_dir = os.path.dirname(__file__)
+ datafile_path = os.path.join(file_dir, 'apps.json')
+ with open(datafile_path) as f:
+ data = json.load(f)
+ return data
+
+ def analyze(self, url=None, response=None):
+ if not response and not url:
+ raise ValueError
+
+ if not response:
+ response = urllib.urlopen(url)
+
+ url = response.url.split('#')[0]
+ html = response.read()
+ data = {
+ 'url': url,
+ 'html': html,
+ 'script': re.findall(r'