diff --git a/src/drivers/npm/README.md b/src/drivers/npm/README.md index 23227a613..e3fc682dd 100644 --- a/src/drivers/npm/README.md +++ b/src/drivers/npm/README.md @@ -31,7 +31,11 @@ $ node index.js https://www.wappalyzer.com const options = { userAgent: 'Wappalyzer', maxWait: 3000, - debug: false + debug: false, + recursive: true, + maxDepth: 3, + maxUrls: 10, + delay: 500, }; const wappalyzer = require('wappalyzer')(options); diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index 487200a2d..95325b011 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -3,45 +3,44 @@ const driver = options => { const Wappalyzer = require('./wappalyzer'); const request = require('request'); + const url = require('url'); const fs = require('fs'); const Browser = require('zombie'); const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json')); return { - analyze: url => { + analyze: pageUrl => { + const origPageUrl = url.parse(pageUrl); + const analyzedPageUrls = []; + const apps = []; + const wappalyzer = new Wappalyzer(); wappalyzer.apps = json.apps; wappalyzer.categories = json.categories; - return new Promise((resolve, reject) => { - wappalyzer.driver.log = (message, source, type) => { - if ( type === 'error' ) { - return reject(message); - } - - if ( Boolean(options.debug) ) { - console.log('[wappalyzer ' + type + ']', '[' + source + ']', message); - } - }; - - wappalyzer.driver.displayApps = detected => { - var apps = []; + wappalyzer.driver.log = (message, source, type) => { + if ( Boolean(options.debug) ) { + console.log('[wappalyzer ' + type + ']', '[' + source + ']', message); + } + }; - Object.keys(detected).forEach(appName => { - const app = detected[appName]; + wappalyzer.driver.displayApps = detected => { + Object.keys(detected).forEach(appName => { + const app = detected[appName]; - var categories = []; + var categories = []; - app.props.cats.forEach(id => { - var category = {}; + app.props.cats.forEach(id => { + var category = {}; - category[id] = wappalyzer.categories[id].name; + category[id] = wappalyzer.categories[id].name; - categories.push(category) - }); + categories.push(category) + }); + if ( !apps.some(detectedApp => detectedApp.name === app.name) ) { apps.push({ name: app.name, confidence: app.confidenceTotal.toString(), @@ -50,52 +49,96 @@ const driver = options => { website: app.props.website, categories }); - }); + } + }); + }; - resolve(apps); - }; + const browser = new Browser({ + userAgent: options.userAgent, + waitDuration: options.maxWait + 'ms', + }); - const browser = new Browser({ - userAgent: options.userAgent - }); + const sleep = ms => new Promise(resolve => setTimeout(resolve, ms)); - browser.visit(url, error => { - if ( !browser.resources['0'] || !browser.resources['0'].response ) { - return wappalyzer.log('No response from server', 'driver', 'error'); + const fetch = (pageUrl, index, depth) => { + return new Promise(async (resolve, reject) => { + // Return when the URL is a duplicate or maxUrls has been reached + if ( analyzedPageUrls.indexOf(pageUrl.href) !== -1 || analyzedPageUrls.length >= options.maxUrls ) { + return resolve(); } - browser.wait(options.maxWait) - .catch(error => wappalyzer.log(error.message, 'browser')) - .finally(() => { - wappalyzer.driver.document = browser.document; + analyzedPageUrls.push(pageUrl.href); - const headers = {}; + wappalyzer.log('depth: ' + depth + '; delay: ' + ( options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver'); - browser.resources['0'].response.headers._headers.forEach(header => { - if ( !headers[header[0]] ){ - headers[header[0]] = []; - } - headers[header[0]].push(header[1]); - }); + // Be nice + if ( options.delay ) { + await sleep(options.delay * index); + } + + browser.visit(pageUrl.href, error => { + if ( !browser.resources['0'] || !browser.resources['0'].response ) { + wappalyzer.log('No response from server', 'browser', 'error'); - const vars = Object.getOwnPropertyNames(browser.window); - const html = browser.html(); - const scripts = Array.prototype.slice - .apply(browser.document.scripts) - .filter(s => s.src) - .map(s => s.src); + return resolve(); + } - const hostname = wappalyzer.parseUrl(url).hostname; + browser.wait() + .catch(error => wappalyzer.log(error.message, 'browser')) + .finally(() => { + wappalyzer.driver.document = browser.document; - wappalyzer.analyze(hostname, url, { - headers, - html, - env: vars, - scripts + const headers = {}; + + browser.resources['0'].response.headers._headers.forEach(header => { + if ( !headers[header[0]] ){ + headers[header[0]] = []; + } + + headers[header[0]].push(header[1]); + }); + + const vars = Object.getOwnPropertyNames(browser.window); + const html = browser.html(); + const scripts = Array.prototype.slice + .apply(browser.document.scripts) + .filter(s => s.src) + .map(s => s.src); + + wappalyzer.analyze(pageUrl.hostname, pageUrl.href, { + headers, + html, + env: vars, + scripts + }); + + resolve(browser); }); - }); + }); }); - }); + }; + + const crawl = async (pageUrl, index, depth) => { + try { + const browser = await fetch(pageUrl, index, depth); + + if ( options.recursive && depth < options.maxDepth && browser ) { + const links = Array.from(browser.body.getElementsByTagName('a')).filter(link => link.hostname === origPageUrl.hostname); + + await Promise.all(links.map(async (link, index) => { + link.hash = ''; + + return crawl(link, index, depth + 1); + })); + } + + return Promise.resolve(apps); + } catch (error) { + return Promise.reject(error); + } + }; + + return crawl(origPageUrl, 1, 1); } }; }; diff --git a/src/drivers/npm/index.js b/src/drivers/npm/index.js index 31f4dc954..494fa9f43 100644 --- a/src/drivers/npm/index.js +++ b/src/drivers/npm/index.js @@ -1,9 +1,13 @@ 'use strict'; const options = { - userAgent: null, + userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)', maxWait: 3000, - debug: false + debug: true, + recursive: true, + maxDepth: 3, + maxUrls: 10, + delay: 500, }; const args = process.argv.slice(2);