Refactor NPM driver into class

main
Elbert Alias 7 years ago
parent a0c4618fbc
commit 3bda8533b2

@ -20,8 +20,20 @@ $ npm i wappalyzer
## Run from the command line ## Run from the command line
```shell ```
$ node index.js https://www.wappalyzer.com node index.js [url] [options]
```
### Options
```
--debug=0|1 Output debug messages.
--delay=ms Wait for ms milliseconds between requests.
--max-depth=num Don't analyze pages more than num levels deep.
--max-urls=num Exit when num URLs have been analyzed.
--max-wait=ms Wait no more than ms milliseconds for page resources to load.
--recursive=0|1 Follow links on pages (crawler).
--user-agent=str Set the user agent string.
``` ```
@ -29,18 +41,18 @@ $ node index.js https://www.wappalyzer.com
```javascript ```javascript
const options = { const options = {
userAgent: 'Wappalyzer',
maxWait: 3000,
debug: false, debug: false,
recursive: true, delay: 500,
maxDepth: 3, maxDepth: 3,
maxUrls: 10, maxUrls: 10,
delay: 500, maxWait: 3000,
recursive: true,
userAgent: 'Wappalyzer',
}; };
const wappalyzer = require('wappalyzer')(options); const wappalyzer = new Wappalyzer('https://www.wappalyzer.com', options);
wappalyzer.analyze('https://www.wappalyzer.com') wappalyzer.analyze()
.then(json => { .then(json => {
process.stdout.write(JSON.stringify(json, null, 2) + '\n') process.stdout.write(JSON.stringify(json, null, 2) + '\n')

@ -1,146 +1,161 @@
'use strict'; 'use strict';
const driver = options => { const Wappalyzer = require('./wappalyzer');
const Wappalyzer = require('./wappalyzer'); const request = require('request');
const request = require('request'); const url = require('url');
const url = require('url'); const fs = require('fs');
const fs = require('fs'); const Browser = require('zombie');
const Browser = require('zombie');
const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json'));
class Driver {
constructor(pageUrl, options) {
this.options = Object.assign({}, {
debug: false,
delay: 500,
maxDepth: 3,
maxUrls: 10,
maxWait: 3000,
recursive: false,
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
}, options || {});
this.origPageUrl = url.parse(pageUrl);
this.analyzedPageUrls = [];
this.apps = [];
this.wappalyzer = new Wappalyzer();
this.wappalyzer.apps = json.apps;
this.wappalyzer.categories = json.categories;
this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type);
this.wappalyzer.driver.displayApps = detected => this.displayApps(detected);
}
analyze() {
return this.crawl(this.origPageUrl);
}
log(message, source, type) {
if ( Boolean(this.options.debug) ) {
console.log('[wappalyzer ' + type + ']', '[' + source + ']', message);
}
}
const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json')); displayApps(detected) {
Object.keys(detected).forEach(appName => {
const app = detected[appName];
return { var categories = [];
analyze: pageUrl => {
const origPageUrl = url.parse(pageUrl);
const analyzedPageUrls = [];
const apps = [];
const wappalyzer = new Wappalyzer(); app.props.cats.forEach(id => {
var category = {};
wappalyzer.apps = json.apps; category[id] = json.categories[id].name;
wappalyzer.categories = json.categories;
wappalyzer.driver.log = (message, source, type) => { categories.push(category)
if ( Boolean(options.debug) ) { });
console.log('[wappalyzer ' + type + ']', '[' + source + ']', message);
}
};
wappalyzer.driver.displayApps = detected => { if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) {
Object.keys(detected).forEach(appName => { this.apps.push({
const app = detected[appName]; name: app.name,
confidence: app.confidenceTotal.toString(),
version: app.version,
icon: app.props.icon || 'default.svg',
website: app.props.website,
categories
});
}
});
}
var categories = []; fetch(pageUrl, index, depth) {
return new Promise(async resolve => {
// Return when the URL is a duplicate or maxUrls has been reached
if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) {
return resolve();
}
app.props.cats.forEach(id => { this.analyzedPageUrls.push(pageUrl.href);
var category = {};
category[id] = wappalyzer.categories[id].name; this.wappalyzer.log('depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver');
categories.push(category) // Be nice
}); if ( this.options.delay ) {
await this.sleep(this.options.delay * index);
if ( !apps.some(detectedApp => detectedApp.name === app.name) ) { }
apps.push({
name: app.name,
confidence: app.confidenceTotal.toString(),
version: app.version,
icon: app.props.icon || 'default.svg',
website: app.props.website,
categories
});
}
});
};
const browser = new Browser({ const browser = new Browser({
userAgent: options.userAgent, userAgent: this.options.userAgent,
waitDuration: options.maxWait + 'ms', waitDuration: this.options.maxWait + 'ms',
}); });
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms)); browser.visit(pageUrl.href, error => {
if ( !browser.resources['0'] || !browser.resources['0'].response ) {
const fetch = (pageUrl, index, depth) => { this.wappalyzer.log('No response from server', 'browser', 'error');
return new Promise(async (resolve, reject) => {
// Return when the URL is a duplicate or maxUrls has been reached
if ( analyzedPageUrls.indexOf(pageUrl.href) !== -1 || analyzedPageUrls.length >= options.maxUrls ) {
return resolve();
}
analyzedPageUrls.push(pageUrl.href);
wappalyzer.log('depth: ' + depth + '; delay: ' + ( options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver');
// Be nice return resolve();
if ( options.delay ) { }
await sleep(options.delay * index);
}
browser.visit(pageUrl.href, error => {
if ( !browser.resources['0'] || !browser.resources['0'].response ) {
wappalyzer.log('No response from server', 'browser', 'error');
return resolve();
}
browser.wait()
.catch(error => wappalyzer.log(error.message, 'browser'))
.finally(() => {
wappalyzer.driver.document = browser.document;
const headers = {}; browser.wait()
.catch(error => this.wappalyzer.log(error.message, 'browser', 'error'))
.finally(() => {
const headers = {};
browser.resources['0'].response.headers._headers.forEach(header => { browser.resources['0'].response.headers._headers.forEach(header => {
if ( !headers[header[0]] ){ if ( !headers[header[0]] ){
headers[header[0]] = []; headers[header[0]] = [];
} }
headers[header[0]].push(header[1]); headers[header[0]].push(header[1]);
}); });
const vars = Object.getOwnPropertyNames(browser.window); const vars = Object.getOwnPropertyNames(browser.window);
const html = browser.html(); const html = browser.html();
const scripts = Array.prototype.slice const scripts = Array.prototype.slice
.apply(browser.document.scripts) .apply(browser.document.scripts)
.filter(s => s.src) .filter(s => s.src)
.map(s => s.src); .map(s => s.src);
this.wappalyzer.analyze(pageUrl.hostname, pageUrl.href, {
headers,
html,
env: vars,
scripts
});
wappalyzer.analyze(pageUrl.hostname, pageUrl.href, { const links = browser.body.getElementsByTagName('a');
headers,
html,
env: vars,
scripts
});
resolve(browser); resolve(links);
});
}); });
}); });
}; });
}
const crawl = async (pageUrl, index, depth) => { async crawl(pageUrl, index = 1, depth = 1) {
try { try {
const browser = await fetch(pageUrl, index, depth); var links = await this.fetch(pageUrl, index, depth);
if ( options.recursive && depth < options.maxDepth && browser ) { if ( this.options.recursive && depth < this.options.maxDepth && links ) {
const links = Array.from(browser.body.getElementsByTagName('a')).filter(link => link.hostname === origPageUrl.hostname); links = Array.from(links).filter(link => link.hostname === this.origPageUrl.hostname);
await Promise.all(links.map(async (link, index) => { await Promise.all(links.map(async (link, index) => {
link.hash = ''; link.hash = '';
return crawl(link, index, depth + 1); return this.crawl(link, index + 1, depth + 1);
})); }));
} }
return Promise.resolve(apps); return Promise.resolve(this.apps);
} catch (error) { } catch (error) {
return Promise.reject(error); return Promise.reject(error);
}
};
return crawl(origPageUrl, 1, 1);
} }
}; }
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
}; };
module.exports = driver; module.exports = Driver;

@ -1,14 +1,6 @@
'use strict'; 'use strict';
const options = { const Wappalyzer = require('./driver');
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
maxWait: 3000,
debug: true,
recursive: true,
maxDepth: 3,
maxUrls: 10,
delay: 500,
};
const args = process.argv.slice(2); const args = process.argv.slice(2);
@ -20,6 +12,7 @@ if ( !url ) {
process.exit(1); process.exit(1);
} }
var options = {};
var arg; var arg;
while ( arg = args.shift() ) { while ( arg = args.shift() ) {
@ -29,13 +22,13 @@ while ( arg = args.shift() ) {
var key = matches[1].replace(/-\w/g, matches => matches[1].toUpperCase()); var key = matches[1].replace(/-\w/g, matches => matches[1].toUpperCase());
var value = matches[2]; var value = matches[2];
options.hasOwnProperty(key) && ( options[key] = value ); options[key] = value;
} }
} }
const wappalyzer = require('./driver')(options); const wappalyzer = new Wappalyzer(url, options);
wappalyzer.analyze(url) wappalyzer.analyze()
.then(json => { .then(json => {
process.stdout.write(JSON.stringify(json) + '\n') process.stdout.write(JSON.stringify(json) + '\n')