Refactor NPM driver into class

main
Elbert Alias 7 years ago
parent a0c4618fbc
commit 3bda8533b2

@ -20,8 +20,20 @@ $ npm i wappalyzer
## Run from the command line
```shell
$ node index.js https://www.wappalyzer.com
```
node index.js [url] [options]
```
### Options
```
--debug=0|1 Output debug messages.
--delay=ms Wait for ms milliseconds between requests.
--max-depth=num Don't analyze pages more than num levels deep.
--max-urls=num Exit when num URLs have been analyzed.
--max-wait=ms Wait no more than ms milliseconds for page resources to load.
--recursive=0|1 Follow links on pages (crawler).
--user-agent=str Set the user agent string.
```
@ -29,18 +41,18 @@ $ node index.js https://www.wappalyzer.com
```javascript
const options = {
userAgent: 'Wappalyzer',
maxWait: 3000,
debug: false,
recursive: true,
delay: 500,
maxDepth: 3,
maxUrls: 10,
delay: 500,
maxWait: 3000,
recursive: true,
userAgent: 'Wappalyzer',
};
const wappalyzer = require('wappalyzer')(options);
const wappalyzer = new Wappalyzer('https://www.wappalyzer.com', options);
wappalyzer.analyze('https://www.wappalyzer.com')
wappalyzer.analyze()
.then(json => {
process.stdout.write(JSON.stringify(json, null, 2) + '\n')

@ -1,6 +1,5 @@
'use strict';
const driver = options => {
const Wappalyzer = require('./wappalyzer');
const request = require('request');
const url = require('url');
@ -9,24 +8,42 @@ const driver = options => {
const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json'));
return {
analyze: pageUrl => {
const origPageUrl = url.parse(pageUrl);
const analyzedPageUrls = [];
const apps = [];
const wappalyzer = new Wappalyzer();
class Driver {
constructor(pageUrl, options) {
this.options = Object.assign({}, {
debug: false,
delay: 500,
maxDepth: 3,
maxUrls: 10,
maxWait: 3000,
recursive: false,
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
}, options || {});
this.origPageUrl = url.parse(pageUrl);
this.analyzedPageUrls = [];
this.apps = [];
this.wappalyzer = new Wappalyzer();
this.wappalyzer.apps = json.apps;
this.wappalyzer.categories = json.categories;
this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type);
this.wappalyzer.driver.displayApps = detected => this.displayApps(detected);
}
wappalyzer.apps = json.apps;
wappalyzer.categories = json.categories;
analyze() {
return this.crawl(this.origPageUrl);
}
wappalyzer.driver.log = (message, source, type) => {
if ( Boolean(options.debug) ) {
log(message, source, type) {
if ( Boolean(this.options.debug) ) {
console.log('[wappalyzer ' + type + ']', '[' + source + ']', message);
}
};
}
wappalyzer.driver.displayApps = detected => {
displayApps(detected) {
Object.keys(detected).forEach(appName => {
const app = detected[appName];
@ -35,13 +52,13 @@ const driver = options => {
app.props.cats.forEach(id => {
var category = {};
category[id] = wappalyzer.categories[id].name;
category[id] = json.categories[id].name;
categories.push(category)
});
if ( !apps.some(detectedApp => detectedApp.name === app.name) ) {
apps.push({
if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) {
this.apps.push({
name: app.name,
confidence: app.confidenceTotal.toString(),
version: app.version,
@ -51,43 +68,39 @@ const driver = options => {
});
}
});
};
const browser = new Browser({
userAgent: options.userAgent,
waitDuration: options.maxWait + 'ms',
});
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
}
const fetch = (pageUrl, index, depth) => {
return new Promise(async (resolve, reject) => {
fetch(pageUrl, index, depth) {
return new Promise(async resolve => {
// Return when the URL is a duplicate or maxUrls has been reached
if ( analyzedPageUrls.indexOf(pageUrl.href) !== -1 || analyzedPageUrls.length >= options.maxUrls ) {
if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) {
return resolve();
}
analyzedPageUrls.push(pageUrl.href);
this.analyzedPageUrls.push(pageUrl.href);
wappalyzer.log('depth: ' + depth + '; delay: ' + ( options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver');
this.wappalyzer.log('depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver');
// Be nice
if ( options.delay ) {
await sleep(options.delay * index);
if ( this.options.delay ) {
await this.sleep(this.options.delay * index);
}
const browser = new Browser({
userAgent: this.options.userAgent,
waitDuration: this.options.maxWait + 'ms',
});
browser.visit(pageUrl.href, error => {
if ( !browser.resources['0'] || !browser.resources['0'].response ) {
wappalyzer.log('No response from server', 'browser', 'error');
this.wappalyzer.log('No response from server', 'browser', 'error');
return resolve();
}
browser.wait()
.catch(error => wappalyzer.log(error.message, 'browser'))
.catch(error => this.wappalyzer.log(error.message, 'browser', 'error'))
.finally(() => {
wappalyzer.driver.document = browser.document;
const headers = {};
browser.resources['0'].response.headers._headers.forEach(header => {
@ -105,42 +118,44 @@ const driver = options => {
.filter(s => s.src)
.map(s => s.src);
wappalyzer.analyze(pageUrl.hostname, pageUrl.href, {
this.wappalyzer.analyze(pageUrl.hostname, pageUrl.href, {
headers,
html,
env: vars,
scripts
});
resolve(browser);
const links = browser.body.getElementsByTagName('a');
resolve(links);
});
});
});
};
}
const crawl = async (pageUrl, index, depth) => {
async crawl(pageUrl, index = 1, depth = 1) {
try {
const browser = await fetch(pageUrl, index, depth);
var links = await this.fetch(pageUrl, index, depth);
if ( options.recursive && depth < options.maxDepth && browser ) {
const links = Array.from(browser.body.getElementsByTagName('a')).filter(link => link.hostname === origPageUrl.hostname);
if ( this.options.recursive && depth < this.options.maxDepth && links ) {
links = Array.from(links).filter(link => link.hostname === this.origPageUrl.hostname);
await Promise.all(links.map(async (link, index) => {
link.hash = '';
return crawl(link, index, depth + 1);
return this.crawl(link, index + 1, depth + 1);
}));
}
return Promise.resolve(apps);
return Promise.resolve(this.apps);
} catch (error) {
return Promise.reject(error);
}
};
}
return crawl(origPageUrl, 1, 1);
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
};
};
module.exports = driver;
module.exports = Driver;

@ -1,14 +1,6 @@
'use strict';
const options = {
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
maxWait: 3000,
debug: true,
recursive: true,
maxDepth: 3,
maxUrls: 10,
delay: 500,
};
const Wappalyzer = require('./driver');
const args = process.argv.slice(2);
@ -20,6 +12,7 @@ if ( !url ) {
process.exit(1);
}
var options = {};
var arg;
while ( arg = args.shift() ) {
@ -29,13 +22,13 @@ while ( arg = args.shift() ) {
var key = matches[1].replace(/-\w/g, matches => matches[1].toUpperCase());
var value = matches[2];
options.hasOwnProperty(key) && ( options[key] = value );
options[key] = value;
}
}
const wappalyzer = require('./driver')(options);
const wappalyzer = new Wappalyzer(url, options);
wappalyzer.analyze(url)
wappalyzer.analyze()
.then(json => {
process.stdout.write(JSON.stringify(json) + '\n')