#!/usr/bin/env python # -*- coding: utf-8 -*- import os import re import sys import urllib from urlparse import urlparse try: import json except ImportError: import simplejson as json class Application(object): def __init__(self, app): self.app = app self.confidence = {} self.detected = False def set_detected(self, pattern, type, value, key=None): self.detected = True self.confidence[type + ' ' + (key + ' ' if key else '') + pattern.str] = pattern.confidence # todo: detect version def get_confidence(self): total = sum(v for v in self.confidence.itervalues()) return min(100, total) class Wappalyzer(object): def __init__(self, data=None, datafile_path=None): data = data or self.load_data(datafile_path) self.categories = data['categories'] self.apps = data['apps'] def load_data(self, datafile_path=NotImplementedError): if not datafile_path: file_dir = os.path.dirname(__file__) datafile_path = os.path.join(file_dir, 'apps.json') with open(datafile_path) as f: data = json.load(f) return data def analyze(self, url=None, response=None): if not response and not url: raise ValueError if not response: response = urllib.urlopen(url) url = response.url.split('#')[0] html = response.read() data = { 'url': url, 'html': html, 'script': re.findall(r']+src=(?:"|\')([^"\']+)', html, re.I | re.M), 'meta': dict((n.lower(), v) for n, v in re.findall('' % sys.argv[0])