Refactor NPM driver into class

main
Elbert Alias 7 years ago
parent a0c4618fbc
commit 3bda8533b2

@ -20,8 +20,20 @@ $ npm i wappalyzer
## Run from the command line ## Run from the command line
```shell ```
$ node index.js https://www.wappalyzer.com node index.js [url] [options]
```
### Options
```
--debug=0|1 Output debug messages.
--delay=ms Wait for ms milliseconds between requests.
--max-depth=num Don't analyze pages more than num levels deep.
--max-urls=num Exit when num URLs have been analyzed.
--max-wait=ms Wait no more than ms milliseconds for page resources to load.
--recursive=0|1 Follow links on pages (crawler).
--user-agent=str Set the user agent string.
``` ```
@ -29,18 +41,18 @@ $ node index.js https://www.wappalyzer.com
```javascript ```javascript
const options = { const options = {
userAgent: 'Wappalyzer',
maxWait: 3000,
debug: false, debug: false,
recursive: true, delay: 500,
maxDepth: 3, maxDepth: 3,
maxUrls: 10, maxUrls: 10,
delay: 500, maxWait: 3000,
recursive: true,
userAgent: 'Wappalyzer',
}; };
const wappalyzer = require('wappalyzer')(options); const wappalyzer = new Wappalyzer('https://www.wappalyzer.com', options);
wappalyzer.analyze('https://www.wappalyzer.com') wappalyzer.analyze()
.then(json => { .then(json => {
process.stdout.write(JSON.stringify(json, null, 2) + '\n') process.stdout.write(JSON.stringify(json, null, 2) + '\n')

@ -1,32 +1,49 @@
'use strict'; 'use strict';
const driver = options => { const Wappalyzer = require('./wappalyzer');
const Wappalyzer = require('./wappalyzer'); const request = require('request');
const request = require('request'); const url = require('url');
const url = require('url'); const fs = require('fs');
const fs = require('fs'); const Browser = require('zombie');
const Browser = require('zombie');
const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json'));
const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json'));
class Driver {
return { constructor(pageUrl, options) {
analyze: pageUrl => { this.options = Object.assign({}, {
const origPageUrl = url.parse(pageUrl); debug: false,
const analyzedPageUrls = []; delay: 500,
const apps = []; maxDepth: 3,
maxUrls: 10,
const wappalyzer = new Wappalyzer(); maxWait: 3000,
recursive: false,
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
}, options || {});
this.origPageUrl = url.parse(pageUrl);
this.analyzedPageUrls = [];
this.apps = [];
this.wappalyzer = new Wappalyzer();
this.wappalyzer.apps = json.apps;
this.wappalyzer.categories = json.categories;
this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type);
this.wappalyzer.driver.displayApps = detected => this.displayApps(detected);
}
wappalyzer.apps = json.apps; analyze() {
wappalyzer.categories = json.categories; return this.crawl(this.origPageUrl);
}
wappalyzer.driver.log = (message, source, type) => { log(message, source, type) {
if ( Boolean(options.debug) ) { if ( Boolean(this.options.debug) ) {
console.log('[wappalyzer ' + type + ']', '[' + source + ']', message); console.log('[wappalyzer ' + type + ']', '[' + source + ']', message);
} }
}; }
wappalyzer.driver.displayApps = detected => { displayApps(detected) {
Object.keys(detected).forEach(appName => { Object.keys(detected).forEach(appName => {
const app = detected[appName]; const app = detected[appName];
@ -35,13 +52,13 @@ const driver = options => {
app.props.cats.forEach(id => { app.props.cats.forEach(id => {
var category = {}; var category = {};
category[id] = wappalyzer.categories[id].name; category[id] = json.categories[id].name;
categories.push(category) categories.push(category)
}); });
if ( !apps.some(detectedApp => detectedApp.name === app.name) ) { if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) {
apps.push({ this.apps.push({
name: app.name, name: app.name,
confidence: app.confidenceTotal.toString(), confidence: app.confidenceTotal.toString(),
version: app.version, version: app.version,
@ -51,43 +68,39 @@ const driver = options => {
}); });
} }
}); });
}; }
const browser = new Browser({
userAgent: options.userAgent,
waitDuration: options.maxWait + 'ms',
});
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
const fetch = (pageUrl, index, depth) => { fetch(pageUrl, index, depth) {
return new Promise(async (resolve, reject) => { return new Promise(async resolve => {
// Return when the URL is a duplicate or maxUrls has been reached // Return when the URL is a duplicate or maxUrls has been reached
if ( analyzedPageUrls.indexOf(pageUrl.href) !== -1 || analyzedPageUrls.length >= options.maxUrls ) { if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) {
return resolve(); return resolve();
} }
analyzedPageUrls.push(pageUrl.href); this.analyzedPageUrls.push(pageUrl.href);
wappalyzer.log('depth: ' + depth + '; delay: ' + ( options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver'); this.wappalyzer.log('depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver');
// Be nice // Be nice
if ( options.delay ) { if ( this.options.delay ) {
await sleep(options.delay * index); await this.sleep(this.options.delay * index);
} }
const browser = new Browser({
userAgent: this.options.userAgent,
waitDuration: this.options.maxWait + 'ms',
});
browser.visit(pageUrl.href, error => { browser.visit(pageUrl.href, error => {
if ( !browser.resources['0'] || !browser.resources['0'].response ) { if ( !browser.resources['0'] || !browser.resources['0'].response ) {
wappalyzer.log('No response from server', 'browser', 'error'); this.wappalyzer.log('No response from server', 'browser', 'error');
return resolve(); return resolve();
} }
browser.wait() browser.wait()
.catch(error => wappalyzer.log(error.message, 'browser')) .catch(error => this.wappalyzer.log(error.message, 'browser', 'error'))
.finally(() => { .finally(() => {
wappalyzer.driver.document = browser.document;
const headers = {}; const headers = {};
browser.resources['0'].response.headers._headers.forEach(header => { browser.resources['0'].response.headers._headers.forEach(header => {
@ -105,42 +118,44 @@ const driver = options => {
.filter(s => s.src) .filter(s => s.src)
.map(s => s.src); .map(s => s.src);
wappalyzer.analyze(pageUrl.hostname, pageUrl.href, { this.wappalyzer.analyze(pageUrl.hostname, pageUrl.href, {
headers, headers,
html, html,
env: vars, env: vars,
scripts scripts
}); });
resolve(browser); const links = browser.body.getElementsByTagName('a');
resolve(links);
}); });
}); });
}); });
}; }
const crawl = async (pageUrl, index, depth) => { async crawl(pageUrl, index = 1, depth = 1) {
try { try {
const browser = await fetch(pageUrl, index, depth); var links = await this.fetch(pageUrl, index, depth);
if ( options.recursive && depth < options.maxDepth && browser ) { if ( this.options.recursive && depth < this.options.maxDepth && links ) {
const links = Array.from(browser.body.getElementsByTagName('a')).filter(link => link.hostname === origPageUrl.hostname); links = Array.from(links).filter(link => link.hostname === this.origPageUrl.hostname);
await Promise.all(links.map(async (link, index) => { await Promise.all(links.map(async (link, index) => {
link.hash = ''; link.hash = '';
return crawl(link, index, depth + 1); return this.crawl(link, index + 1, depth + 1);
})); }));
} }
return Promise.resolve(apps); return Promise.resolve(this.apps);
} catch (error) { } catch (error) {
return Promise.reject(error); return Promise.reject(error);
} }
}; }
return crawl(origPageUrl, 1, 1); sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
} }
};
}; };
module.exports = driver; module.exports = Driver;

@ -1,14 +1,6 @@
'use strict'; 'use strict';
const options = { const Wappalyzer = require('./driver');
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
maxWait: 3000,
debug: true,
recursive: true,
maxDepth: 3,
maxUrls: 10,
delay: 500,
};
const args = process.argv.slice(2); const args = process.argv.slice(2);
@ -20,6 +12,7 @@ if ( !url ) {
process.exit(1); process.exit(1);
} }
var options = {};
var arg; var arg;
while ( arg = args.shift() ) { while ( arg = args.shift() ) {
@ -29,13 +22,13 @@ while ( arg = args.shift() ) {
var key = matches[1].replace(/-\w/g, matches => matches[1].toUpperCase()); var key = matches[1].replace(/-\w/g, matches => matches[1].toUpperCase());
var value = matches[2]; var value = matches[2];
options.hasOwnProperty(key) && ( options[key] = value ); options[key] = value;
} }
} }
const wappalyzer = require('./driver')(options); const wappalyzer = new Wappalyzer(url, options);
wappalyzer.analyze(url) wappalyzer.analyze()
.then(json => { .then(json => {
process.stdout.write(JSON.stringify(json) + '\n') process.stdout.write(JSON.stringify(json) + '\n')