Merge pull request #715 from chorsley/master

Fix for BuySellAds regex, make python_raw driver into Python package
main
Elbert Alias 10 years ago
commit 2b6ad4be2f

@ -0,0 +1,23 @@
from setuptools import setup
setup(
name="wappalyzer",
version="0.0.1",
description="Python package for python_raw driver in Wappalyzer bundle",
author="smant",
author_email="TBA",
url="https://github.com/ElbertF/Wappalyzer/tree/master/drivers/python_raw",
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
'Programming Language :: Python :: 2.7',
'Topic :: Internet :: WWW/HTTP',
],
py_modules=['wappalyzer'],
install_requires=[
],
test_suite='nose.collector',
tests_require=[
]
)

@ -14,6 +14,7 @@ except ImportError:
class Application(object): class Application(object):
def __init__(self, app): def __init__(self, app):
self.app = app self.app = app
self.confidence = {} self.confidence = {}
@ -21,7 +22,8 @@ class Application(object):
def set_detected(self, pattern, type, value, key=None): def set_detected(self, pattern, type, value, key=None):
self.detected = True self.detected = True
self.confidence[type + ' ' + (key + ' ' if key else '') + pattern.str] = pattern.confidence self.confidence[
type + ' ' + (key + ' ' if key else '') + pattern.str] = pattern.confidence
# todo: detect version # todo: detect version
@ -31,6 +33,7 @@ class Application(object):
class Wappalyzer(object): class Wappalyzer(object):
def __init__(self, data=None, datafile_path=None): def __init__(self, data=None, datafile_path=None):
data = data or self.load_data(datafile_path) data = data or self.load_data(datafile_path)
self.categories = data['categories'] self.categories = data['categories']
@ -76,18 +79,22 @@ class Wappalyzer(object):
if detection_type in ['url', 'html']: if detection_type in ['url', 'html']:
for pattern in self.parse_patterns(patterns): for pattern in self.parse_patterns(patterns):
if pattern.regex.search(data[detection_type]): if pattern.regex.search(data[detection_type]):
application.set_detected(pattern, detection_type, data[detection_type]) application.set_detected(
pattern, detection_type, data[detection_type])
elif detection_type in ['meta', 'headers']: elif detection_type in ['meta', 'headers']:
for hm_name, hm_pattern in patterns.iteritems(): for hm_name, hm_pattern in patterns.iteritems():
for pattern in self.parse_patterns(hm_pattern): for pattern in self.parse_patterns(hm_pattern):
value = data[detection_type].get(hm_name.lower()) value = data[detection_type].get(
hm_name.lower())
if value and pattern.regex.search(value): if value and pattern.regex.search(value):
application.set_detected(pattern, detection_type, value, hm_name) application.set_detected(
pattern, detection_type, value, hm_name)
elif detection_type in ['script']: elif detection_type in ['script']:
for script in data[detection_type]: for script in data[detection_type]:
for pattern in self.parse_patterns(patterns): for pattern in self.parse_patterns(patterns):
if pattern.regex.search(script): if pattern.regex.search(script):
application.set_detected(pattern, detection_type, script) application.set_detected(
pattern, detection_type, script)
elif detection_type in ['website', 'excludes', 'cats', 'implies', 'env']: elif detection_type in ['website', 'excludes', 'cats', 'implies', 'env']:
pass pass
else: else:
@ -101,12 +108,12 @@ class Wappalyzer(object):
return detected_apps return detected_apps
class Pattern: class Pattern:
def __init__(self, str): def __init__(self, str):
self.str = str self.str = str
self.regex = re.compile(str, re.I) self.regex = re.compile(str, re.I)
self.confidence = 100 self.confidence = 100
def parse_patterns(self, patterns): def parse_patterns(self, patterns):
if isinstance(patterns, basestring): if isinstance(patterns, basestring):
patterns = [patterns] patterns = [patterns]

@ -461,7 +461,7 @@
"website": "buysellads.com", "website": "buysellads.com",
"cats": [ 36 ], "cats": [ 36 ],
"script": "^https?://s\\d\\.buysellads\\.com/", "script": "^https?://s\\d\\.buysellads\\.com/",
"html": "<script[^>]*>(?:(?!<\\/script>)(?:.|\\s))+?bsa\\.src\\s*=\\s*[\"'][^'\"]+s\\d\\.buysellads\\.com", "html": "<script[^>]*>[^<]+?bsa.src\\s*=\\s*['\"](?:https?:)?\\/{2}\\w\\d\\.buysellads\\.com\\/[\\w\\d\\/]+?bsa\\.js['\"]",
"env": "^_bsa" "env": "^_bsa"
}, },
"Canon": { "Canon": {

Loading…
Cancel
Save