diff --git a/drivers/python_raw/setup.py b/drivers/python_raw/setup.py new file mode 100644 index 000000000..362afe309 --- /dev/null +++ b/drivers/python_raw/setup.py @@ -0,0 +1,23 @@ +from setuptools import setup + +setup( + name="wappalyzer", + version="0.0.1", + description="Python package for python_raw driver in Wappalyzer bundle", + author="smant", + author_email="TBA", + url="https://github.com/ElbertF/Wappalyzer/tree/master/drivers/python_raw", + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Programming Language :: Python :: 2.7', + 'Topic :: Internet :: WWW/HTTP', + ], + py_modules=['wappalyzer'], + install_requires=[ + ], + test_suite='nose.collector', + tests_require=[ + ] +) diff --git a/drivers/python_raw/wappalyzer.py b/drivers/python_raw/wappalyzer.py index 73df758f2..d4bb34760 100755 --- a/drivers/python_raw/wappalyzer.py +++ b/drivers/python_raw/wappalyzer.py @@ -14,6 +14,7 @@ except ImportError: class Application(object): + def __init__(self, app): self.app = app self.confidence = {} @@ -21,7 +22,8 @@ class Application(object): def set_detected(self, pattern, type, value, key=None): self.detected = True - self.confidence[type + ' ' + (key + ' ' if key else '') + pattern.str] = pattern.confidence + self.confidence[ + type + ' ' + (key + ' ' if key else '') + pattern.str] = pattern.confidence # todo: detect version @@ -31,6 +33,7 @@ class Application(object): class Wappalyzer(object): + def __init__(self, data=None, datafile_path=None): data = data or self.load_data(datafile_path) self.categories = data['categories'] @@ -76,18 +79,22 @@ class Wappalyzer(object): if detection_type in ['url', 'html']: for pattern in self.parse_patterns(patterns): if pattern.regex.search(data[detection_type]): - application.set_detected(pattern, detection_type, data[detection_type]) + application.set_detected( + pattern, detection_type, data[detection_type]) elif detection_type in ['meta', 'headers']: for hm_name, hm_pattern in patterns.iteritems(): for pattern in self.parse_patterns(hm_pattern): - value = data[detection_type].get(hm_name.lower()) + value = data[detection_type].get( + hm_name.lower()) if value and pattern.regex.search(value): - application.set_detected(pattern, detection_type, value, hm_name) + application.set_detected( + pattern, detection_type, value, hm_name) elif detection_type in ['script']: for script in data[detection_type]: for pattern in self.parse_patterns(patterns): if pattern.regex.search(script): - application.set_detected(pattern, detection_type, script) + application.set_detected( + pattern, detection_type, script) elif detection_type in ['website', 'excludes', 'cats', 'implies', 'env']: pass else: @@ -101,12 +108,12 @@ class Wappalyzer(object): return detected_apps class Pattern: + def __init__(self, str): self.str = str self.regex = re.compile(str, re.I) self.confidence = 100 - def parse_patterns(self, patterns): if isinstance(patterns, basestring): patterns = [patterns] diff --git a/share/apps.json b/share/apps.json index 1e261eedf..fcda70a37 100644 --- a/share/apps.json +++ b/share/apps.json @@ -461,7 +461,7 @@ "website": "buysellads.com", "cats": [ 36 ], "script": "^https?://s\\d\\.buysellads\\.com/", - "html": "]*>(?:(?!<\\/script>)(?:.|\\s))+?bsa\\.src\\s*=\\s*[\"'][^'\"]+s\\d\\.buysellads\\.com", + "html": "]*>[^<]+?bsa.src\\s*=\\s*['\"](?:https?:)?\\/{2}\\w\\d\\.buysellads\\.com\\/[\\w\\d\\/]+?bsa\\.js['\"]", "env": "^_bsa" }, "Canon": {