/** * Wappalyzer v5 * * Created by Elbert Alias * * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt */ const validation = { hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/, hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/, }; /** * Enclose string in array */ function asArray(value) { return value instanceof Array ? value : [value]; } /** * */ function asyncForEach(iterable, iterator) { return Promise.all((iterable || []) .map(item => new Promise(resolve => setTimeout(() => resolve(iterator(item)), 1)))); } /** * Mark application as detected, set confidence and version */ function addDetected(app, pattern, type, value, key) { app.detected = true; // Set confidence level app.confidence[`${type} ${key ? `${key} ` : ''}${pattern.regex}`] = pattern.confidence === undefined ? 100 : parseInt(pattern.confidence, 10); // Detect version number if (pattern.version) { const versions = []; const matches = pattern.regex.exec(value); let { version } = pattern; if (matches) { matches.forEach((match, i) => { // Parse ternary operator const ternary = new RegExp(`\\\\${i}\\?([^:]+):(.*)$`).exec(version); if (ternary && ternary.length === 3) { version = version.replace(ternary[0], match ? ternary[1] : ternary[2]); } // Replace back references version = version.trim().replace(new RegExp(`\\\\${i}`, 'g'), match || ''); }); if (version && versions.indexOf(version) === -1) { versions.push(version); } if (versions.length) { // Use the longest detected version number app.version = versions.reduce((a, b) => (a.length > b.length ? a : b)); } } } } function resolveExcludes(apps, detected) { const excludes = []; const detectedApps = Object.assign({}, apps, detected); // Exclude app in detected apps only Object.keys(detectedApps).forEach((appName) => { const app = detectedApps[appName]; if (app.props.excludes) { asArray(app.props.excludes).forEach((excluded) => { excludes.push(excluded); }); } }); // Remove excluded applications Object.keys(apps).forEach((appName) => { if (excludes.indexOf(appName) > -1) { delete apps[appName]; } }); } class Application { constructor(name, props, detected) { this.confidence = {}; this.confidenceTotal = 0; this.detected = Boolean(detected); this.excludes = []; this.name = name; this.props = props; this.version = ''; } /** * Calculate confidence total */ getConfidence() { let total = 0; Object.keys(this.confidence).forEach((id) => { total += this.confidence[id]; }); this.confidenceTotal = Math.min(total, 100); return this.confidenceTotal; } } class Wappalyzer { constructor() { this.apps = {}; this.categories = {}; this.driver = {}; this.jsPatterns = {}; this.detected = {}; this.hostnameCache = {}; this.adCache = []; this.config = { websiteURL: 'https://www.wappalyzer.com/', twitterURL: 'https://twitter.com/Wappalyzer', githubURL: 'https://github.com/AliasIO/Wappalyzer', }; } /** * Log messages to console */ log(message, source, type) { if (this.driver.log) { this.driver.log(message, source || '', type || 'debug'); } } analyze(url, data, context) { const apps = {}; const promises = []; const startTime = new Date(); const { scripts, cookies, headers, js, } = data; let { html } = data; if (this.detected[url.canonical] === undefined) { this.detected[url.canonical] = {}; } const metaTags = []; // Additional information let language = null; if (html) { if (typeof html !== 'string') { html = ''; } let matches = data.html.match(new RegExp(']*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i')); language = matches && matches.length ? matches[1] : null; // Meta tags const regex = /]+>/ig; do { matches = regex.exec(html); if (!matches) { break; } metaTags.push(matches[0]); } while (matches); } Object.keys(this.apps).forEach((appName) => { apps[appName] = this.detected[url.canonical] && this.detected[url.canonical][appName] ? this.detected[url.canonical][appName] : new Application(appName, this.apps[appName]); const app = apps[appName]; promises.push(this.analyzeUrl(app, url)); if (html) { promises.push(this.analyzeHtml(app, html)); promises.push(this.analyzeMeta(app, metaTags)); } if (scripts) { promises.push(this.analyzeScripts(app, scripts)); } if (cookies) { promises.push(this.analyzeCookies(app, cookies)); } if (headers) { promises.push(this.analyzeHeaders(app, headers)); } }); if (js) { Object.keys(js).forEach((appName) => { if (typeof js[appName] !== 'function') { promises.push(this.analyzeJs(apps[appName], js[appName])); } }); } return new Promise(async (resolve) => { await Promise.all(promises); Object.keys(apps).forEach((appName) => { const app = apps[appName]; if (!app.detected || !app.getConfidence()) { delete apps[app.name]; } }); resolveExcludes(apps, this.detected[url]); this.resolveImplies(apps, url.canonical); this.cacheDetectedApps(apps, url.canonical); this.trackDetectedApps(apps, url, language); this.log(`Processing ${Object.keys(data).join(', ')} took ${((new Date() - startTime) / 1000).toFixed(2)}s (${url.hostname})`, 'core'); if (Object.keys(apps).length) { this.log(`Identified ${Object.keys(apps).join(', ')} (${url.hostname})`, 'core'); } this.driver.displayApps(this.detected[url.canonical], { language }, context); return resolve(); }); } /** * Cache detected ads */ cacheDetectedAds(ad) { this.adCache.push(ad); } /** * */ robotsTxtAllows(url) { return new Promise(async (resolve, reject) => { const parsed = this.parseUrl(url); if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { return reject(); } const robotsTxt = await this.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:'); if (robotsTxt.some(disallowedPath => parsed.pathname.indexOf(disallowedPath) === 0)) { return reject(); } return resolve(); }); } /** * Parse a URL */ parseUrl(url) { const a = this.driver.document.createElement('a'); a.href = url; a.canonical = `${a.protocol}//${a.host}${a.pathname}`; return a; } /** * */ static parseRobotsTxt(robotsTxt) { const disallow = []; let userAgent; robotsTxt.split('\n').forEach((line) => { let matches = /^User-agent:\s*(.+)$/i.exec(line.trim()); if (matches) { userAgent = matches[1].toLowerCase(); } else if (userAgent === '*' || userAgent === 'wappalyzer') { matches = /^Disallow:\s*(.+)$/i.exec(line.trim()); if (matches) { disallow.push(matches[1]); } } }); return disallow; } /** * */ ping() { if (Object.keys(this.hostnameCache).length > 25) { this.driver.ping(this.hostnameCache); this.hostnameCache = {}; } if (this.adCache.length > 50) { this.driver.ping({}, this.adCache); this.adCache = []; } } /** * Parse apps.json patterns */ parsePatterns(patterns) { if (!patterns) { return []; } let parsed = {}; // Convert string to object containing array containing string if (typeof patterns === 'string' || patterns instanceof Array) { patterns = { main: asArray(patterns), }; } Object.keys(patterns).forEach((key) => { parsed[key] = []; asArray(patterns[key]).forEach((pattern) => { const attrs = {}; pattern.split('\\;').forEach((attr, i) => { if (i) { // Key value pairs attr = attr.split(':'); if (attr.length > 1) { attrs[attr.shift()] = attr.join(':'); } } else { attrs.string = attr; try { attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression } catch (error) { attrs.regex = new RegExp(); this.log(`${error.message}: ${attr}`, 'error', 'core'); } } }); parsed[key].push(attrs); }); }); // Convert back to array if the original pattern list was an array (or string) if ('main' in parsed) { parsed = parsed.main; } return parsed; } /** * Parse JavaScript patterns */ parseJsPatterns() { Object.keys(this.apps).forEach((appName) => { if (this.apps[appName].js) { this.jsPatterns[appName] = this.parsePatterns(this.apps[appName].js); } }); } resolveImplies(apps, url) { let checkImplies = true; const resolve = (appName) => { const app = apps[appName]; if (app && app.props.implies) { asArray(app.props.implies).forEach((implied) => { [implied] = this.parsePatterns(implied); if (!this.apps[implied.string]) { this.log(`Implied application ${implied.string} does not exist`, 'core', 'warn'); return; } if (!(implied.string in apps)) { apps[implied.string] = this.detected[url] && this.detected[url][implied.string] ? this.detected[url][implied.string] : new Application(implied.string, this.apps[implied.string], true); checkImplies = true; } // Apply app confidence to implied app Object.keys(app.confidence).forEach((id) => { apps[implied.string].confidence[`${id} implied by ${appName}`] = app.confidence[id] * (implied.confidence === undefined ? 1 : implied.confidence / 100); }); }); } }; // Implied applications // Run several passes as implied apps may imply other apps while (checkImplies) { checkImplies = false; Object.keys(apps).forEach(resolve); } } /** * Cache detected applications */ cacheDetectedApps(apps, url) { Object.keys(apps).forEach((appName) => { const app = apps[appName]; // Per URL this.detected[url][appName] = app; Object.keys(app.confidence) .forEach((id) => { this.detected[url][appName].confidence[id] = app.confidence[id]; }); }); if (this.driver.ping instanceof Function) { this.ping(); } } /** * Track detected applications */ trackDetectedApps(apps, url, language) { if (!(this.driver.ping instanceof Function)) { return; } const hostname = `${url.protocol}//${url.hostname}`; Object.keys(apps).forEach((appName) => { const app = apps[appName]; if (this.detected[url.canonical][appName].getConfidence() >= 100) { if ( validation.hostname.test(url.hostname) && !validation.hostnameBlacklist.test(url.hostname) ) { if (!(hostname in this.hostnameCache)) { this.hostnameCache[hostname] = { applications: {}, meta: {}, }; } if (!(appName in this.hostnameCache[hostname].applications)) { this.hostnameCache[hostname].applications[appName] = { hits: 0, }; } this.hostnameCache[hostname].applications[appName].hits += 1; if (apps[appName].version) { this.hostnameCache[hostname].applications[appName].version = app.version; } } } }); if (hostname in this.hostnameCache) { this.hostnameCache[hostname].meta.language = language; } this.ping(); } /** * Analyze URL */ analyzeUrl(app, url) { const patterns = this.parsePatterns(app.props.url); if (!patterns.length) { return Promise.resolve(); } return asyncForEach(patterns, (pattern) => { if (pattern.regex.test(url.canonical)) { addDetected(app, pattern, 'url', url.canonical); } }); } /** * Analyze HTML */ analyzeHtml(app, html) { const patterns = this.parsePatterns(app.props.html); if (!patterns.length) { return Promise.resolve(); } return asyncForEach(patterns, (pattern) => { if (pattern.regex.test(html)) { addDetected(app, pattern, 'html', html); } }); } /** * Analyze script tag */ analyzeScripts(app, scripts) { const patterns = this.parsePatterns(app.props.script); if (!patterns.length) { return Promise.resolve(); } return asyncForEach(patterns, (pattern) => { scripts.forEach((uri) => { if (pattern.regex.test(uri)) { addDetected(app, pattern, 'script', uri); } }); }); } /** * Analyze meta tag */ analyzeMeta(app, metaTags) { const patterns = this.parsePatterns(app.props.meta); const promises = []; if (!app.props.meta) { return Promise.resolve(); } metaTags.forEach((match) => { Object.keys(patterns).forEach((meta) => { const r = new RegExp(`(?:name|property)=["']${meta}["']`, 'i'); if (r.test(match)) { const content = match.match(/content=("|')([^"']+)("|')/i); promises.push(asyncForEach(patterns[meta], (pattern) => { if (content && content.length === 4 && pattern.regex.test(content[2])) { addDetected(app, pattern, 'meta', content[2], meta); } })); } }); }); return Promise.all(promises); } /** * Analyze response headers */ analyzeHeaders(app, headers) { const patterns = this.parsePatterns(app.props.headers); const promises = []; Object.keys(patterns).forEach((headerName) => { if (typeof patterns[headerName] !== 'function') { promises.push(asyncForEach(patterns[headerName], (pattern) => { headerName = headerName.toLowerCase(); if (headerName in headers) { headers[headerName].forEach((headerValue) => { if (pattern.regex.test(headerValue)) { addDetected(app, pattern, 'headers', headerValue, headerName); } }); } })); } }); return promises ? Promise.all(promises) : Promise.resolve(); } /** * Analyze cookies */ analyzeCookies(app, cookies) { const patterns = this.parsePatterns(app.props.cookies); const promises = []; Object.keys(patterns).forEach((cookieName) => { if (typeof patterns[cookieName] !== 'function') { const cookieNameLower = cookieName.toLowerCase(); promises.push(asyncForEach(patterns[cookieName], (pattern) => { const cookie = cookies.find(_cookie => _cookie.name.toLowerCase() === cookieNameLower); if (cookie && pattern.regex.test(cookie.value)) { addDetected(app, pattern, 'cookies', cookie.value, cookieName); } })); } }); return promises ? Promise.all(promises) : Promise.resolve(); } /** * Analyze JavaScript variables */ analyzeJs(app, results) { const promises = []; Object.keys(results).forEach((string) => { if (typeof results[string] !== 'function') { promises.push(asyncForEach(Object.keys(results[string]), (index) => { const pattern = this.jsPatterns[app.name][string][index]; const value = results[string][index]; if (pattern && pattern.regex.test(value)) { addDetected(app, pattern, 'js', value, string); } })); } }); return promises ? Promise.all(promises) : Promise.resolve(); } } if (typeof module === 'object') { module.exports = Wappalyzer; }