From 3bda8533b228419bf3e04ac80fc2e088d4560c07 Mon Sep 17 00:00:00 2001 From: Elbert Alias Date: Sun, 19 Nov 2017 09:32:50 +1100 Subject: [PATCH] Refactor NPM driver into class --- src/drivers/npm/README.md | 28 +++-- src/drivers/npm/driver.js | 243 ++++++++++++++++++++------------------ src/drivers/npm/index.js | 17 +-- 3 files changed, 154 insertions(+), 134 deletions(-) diff --git a/src/drivers/npm/README.md b/src/drivers/npm/README.md index e3fc682dd..3875fdbf9 100644 --- a/src/drivers/npm/README.md +++ b/src/drivers/npm/README.md @@ -20,8 +20,20 @@ $ npm i wappalyzer ## Run from the command line -```shell -$ node index.js https://www.wappalyzer.com +``` +node index.js [url] [options] +``` + +### Options + +``` + --debug=0|1 Output debug messages. + --delay=ms Wait for ms milliseconds between requests. + --max-depth=num Don't analyze pages more than num levels deep. + --max-urls=num Exit when num URLs have been analyzed. + --max-wait=ms Wait no more than ms milliseconds for page resources to load. + --recursive=0|1 Follow links on pages (crawler). + --user-agent=str Set the user agent string. ``` @@ -29,18 +41,18 @@ $ node index.js https://www.wappalyzer.com ```javascript const options = { - userAgent: 'Wappalyzer', - maxWait: 3000, debug: false, - recursive: true, + delay: 500, maxDepth: 3, maxUrls: 10, - delay: 500, + maxWait: 3000, + recursive: true, + userAgent: 'Wappalyzer', }; -const wappalyzer = require('wappalyzer')(options); +const wappalyzer = new Wappalyzer('https://www.wappalyzer.com', options); -wappalyzer.analyze('https://www.wappalyzer.com') +wappalyzer.analyze() .then(json => { process.stdout.write(JSON.stringify(json, null, 2) + '\n') diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index 95325b011..2101fee92 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -1,146 +1,161 @@ 'use strict'; -const driver = options => { - const Wappalyzer = require('./wappalyzer'); - const request = require('request'); - const url = require('url'); - const fs = require('fs'); - const Browser = require('zombie'); +const Wappalyzer = require('./wappalyzer'); +const request = require('request'); +const url = require('url'); +const fs = require('fs'); +const Browser = require('zombie'); + +const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json')); + +class Driver { + constructor(pageUrl, options) { + this.options = Object.assign({}, { + debug: false, + delay: 500, + maxDepth: 3, + maxUrls: 10, + maxWait: 3000, + recursive: false, + userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)', + }, options || {}); + + this.origPageUrl = url.parse(pageUrl); + this.analyzedPageUrls = []; + this.apps = []; + + this.wappalyzer = new Wappalyzer(); + + this.wappalyzer.apps = json.apps; + this.wappalyzer.categories = json.categories; + + this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type); + this.wappalyzer.driver.displayApps = detected => this.displayApps(detected); + } + + analyze() { + return this.crawl(this.origPageUrl); + } + + log(message, source, type) { + if ( Boolean(this.options.debug) ) { + console.log('[wappalyzer ' + type + ']', '[' + source + ']', message); + } + } - const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json')); + displayApps(detected) { + Object.keys(detected).forEach(appName => { + const app = detected[appName]; - return { - analyze: pageUrl => { - const origPageUrl = url.parse(pageUrl); - const analyzedPageUrls = []; - const apps = []; + var categories = []; - const wappalyzer = new Wappalyzer(); + app.props.cats.forEach(id => { + var category = {}; - wappalyzer.apps = json.apps; - wappalyzer.categories = json.categories; + category[id] = json.categories[id].name; - wappalyzer.driver.log = (message, source, type) => { - if ( Boolean(options.debug) ) { - console.log('[wappalyzer ' + type + ']', '[' + source + ']', message); - } - }; + categories.push(category) + }); - wappalyzer.driver.displayApps = detected => { - Object.keys(detected).forEach(appName => { - const app = detected[appName]; + if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) { + this.apps.push({ + name: app.name, + confidence: app.confidenceTotal.toString(), + version: app.version, + icon: app.props.icon || 'default.svg', + website: app.props.website, + categories + }); + } + }); + } - var categories = []; + fetch(pageUrl, index, depth) { + return new Promise(async resolve => { + // Return when the URL is a duplicate or maxUrls has been reached + if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) { + return resolve(); + } - app.props.cats.forEach(id => { - var category = {}; + this.analyzedPageUrls.push(pageUrl.href); - category[id] = wappalyzer.categories[id].name; + this.wappalyzer.log('depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver'); - categories.push(category) - }); - - if ( !apps.some(detectedApp => detectedApp.name === app.name) ) { - apps.push({ - name: app.name, - confidence: app.confidenceTotal.toString(), - version: app.version, - icon: app.props.icon || 'default.svg', - website: app.props.website, - categories - }); - } - }); - }; + // Be nice + if ( this.options.delay ) { + await this.sleep(this.options.delay * index); + } const browser = new Browser({ - userAgent: options.userAgent, - waitDuration: options.maxWait + 'ms', + userAgent: this.options.userAgent, + waitDuration: this.options.maxWait + 'ms', }); - const sleep = ms => new Promise(resolve => setTimeout(resolve, ms)); - - const fetch = (pageUrl, index, depth) => { - return new Promise(async (resolve, reject) => { - // Return when the URL is a duplicate or maxUrls has been reached - if ( analyzedPageUrls.indexOf(pageUrl.href) !== -1 || analyzedPageUrls.length >= options.maxUrls ) { - return resolve(); - } - - analyzedPageUrls.push(pageUrl.href); - - wappalyzer.log('depth: ' + depth + '; delay: ' + ( options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver'); + browser.visit(pageUrl.href, error => { + if ( !browser.resources['0'] || !browser.resources['0'].response ) { + this.wappalyzer.log('No response from server', 'browser', 'error'); - // Be nice - if ( options.delay ) { - await sleep(options.delay * index); - } - - browser.visit(pageUrl.href, error => { - if ( !browser.resources['0'] || !browser.resources['0'].response ) { - wappalyzer.log('No response from server', 'browser', 'error'); - - return resolve(); - } - - browser.wait() - .catch(error => wappalyzer.log(error.message, 'browser')) - .finally(() => { - wappalyzer.driver.document = browser.document; + return resolve(); + } - const headers = {}; + browser.wait() + .catch(error => this.wappalyzer.log(error.message, 'browser', 'error')) + .finally(() => { + const headers = {}; - browser.resources['0'].response.headers._headers.forEach(header => { - if ( !headers[header[0]] ){ - headers[header[0]] = []; - } + browser.resources['0'].response.headers._headers.forEach(header => { + if ( !headers[header[0]] ){ + headers[header[0]] = []; + } - headers[header[0]].push(header[1]); - }); + headers[header[0]].push(header[1]); + }); - const vars = Object.getOwnPropertyNames(browser.window); - const html = browser.html(); - const scripts = Array.prototype.slice - .apply(browser.document.scripts) - .filter(s => s.src) - .map(s => s.src); + const vars = Object.getOwnPropertyNames(browser.window); + const html = browser.html(); + const scripts = Array.prototype.slice + .apply(browser.document.scripts) + .filter(s => s.src) + .map(s => s.src); + + this.wappalyzer.analyze(pageUrl.hostname, pageUrl.href, { + headers, + html, + env: vars, + scripts + }); - wappalyzer.analyze(pageUrl.hostname, pageUrl.href, { - headers, - html, - env: vars, - scripts - }); + const links = browser.body.getElementsByTagName('a'); - resolve(browser); - }); + resolve(links); }); - }); - }; + }); + }); + } - const crawl = async (pageUrl, index, depth) => { - try { - const browser = await fetch(pageUrl, index, depth); + async crawl(pageUrl, index = 1, depth = 1) { + try { + var links = await this.fetch(pageUrl, index, depth); - if ( options.recursive && depth < options.maxDepth && browser ) { - const links = Array.from(browser.body.getElementsByTagName('a')).filter(link => link.hostname === origPageUrl.hostname); + if ( this.options.recursive && depth < this.options.maxDepth && links ) { + links = Array.from(links).filter(link => link.hostname === this.origPageUrl.hostname); - await Promise.all(links.map(async (link, index) => { - link.hash = ''; + await Promise.all(links.map(async (link, index) => { + link.hash = ''; - return crawl(link, index, depth + 1); - })); - } + return this.crawl(link, index + 1, depth + 1); + })); + } - return Promise.resolve(apps); - } catch (error) { - return Promise.reject(error); - } - }; - - return crawl(origPageUrl, 1, 1); + return Promise.resolve(this.apps); + } catch (error) { + return Promise.reject(error); } - }; + } + + sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } }; -module.exports = driver; +module.exports = Driver; diff --git a/src/drivers/npm/index.js b/src/drivers/npm/index.js index 494fa9f43..c8c254850 100644 --- a/src/drivers/npm/index.js +++ b/src/drivers/npm/index.js @@ -1,14 +1,6 @@ 'use strict'; -const options = { - userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)', - maxWait: 3000, - debug: true, - recursive: true, - maxDepth: 3, - maxUrls: 10, - delay: 500, -}; +const Wappalyzer = require('./driver'); const args = process.argv.slice(2); @@ -20,6 +12,7 @@ if ( !url ) { process.exit(1); } +var options = {}; var arg; while ( arg = args.shift() ) { @@ -29,13 +22,13 @@ while ( arg = args.shift() ) { var key = matches[1].replace(/-\w/g, matches => matches[1].toUpperCase()); var value = matches[2]; - options.hasOwnProperty(key) && ( options[key] = value ); + options[key] = value; } } -const wappalyzer = require('./driver')(options); +const wappalyzer = new Wappalyzer(url, options); -wappalyzer.analyze(url) +wappalyzer.analyze() .then(json => { process.stdout.write(JSON.stringify(json) + '\n')