|
|
@ -3,32 +3,30 @@
|
|
|
|
const driver = options => {
|
|
|
|
const driver = options => {
|
|
|
|
const Wappalyzer = require('./wappalyzer');
|
|
|
|
const Wappalyzer = require('./wappalyzer');
|
|
|
|
const request = require('request');
|
|
|
|
const request = require('request');
|
|
|
|
|
|
|
|
const url = require('url');
|
|
|
|
const fs = require('fs');
|
|
|
|
const fs = require('fs');
|
|
|
|
const Browser = require('zombie');
|
|
|
|
const Browser = require('zombie');
|
|
|
|
|
|
|
|
|
|
|
|
const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json'));
|
|
|
|
const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json'));
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
return {
|
|
|
|
analyze: url => {
|
|
|
|
analyze: pageUrl => {
|
|
|
|
|
|
|
|
const origPageUrl = url.parse(pageUrl);
|
|
|
|
|
|
|
|
const analyzedPageUrls = [];
|
|
|
|
|
|
|
|
const apps = [];
|
|
|
|
|
|
|
|
|
|
|
|
const wappalyzer = new Wappalyzer();
|
|
|
|
const wappalyzer = new Wappalyzer();
|
|
|
|
|
|
|
|
|
|
|
|
wappalyzer.apps = json.apps;
|
|
|
|
wappalyzer.apps = json.apps;
|
|
|
|
wappalyzer.categories = json.categories;
|
|
|
|
wappalyzer.categories = json.categories;
|
|
|
|
|
|
|
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
|
|
|
wappalyzer.driver.log = (message, source, type) => {
|
|
|
|
wappalyzer.driver.log = (message, source, type) => {
|
|
|
|
if ( type === 'error' ) {
|
|
|
|
|
|
|
|
return reject(message);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ( Boolean(options.debug) ) {
|
|
|
|
if ( Boolean(options.debug) ) {
|
|
|
|
console.log('[wappalyzer ' + type + ']', '[' + source + ']', message);
|
|
|
|
console.log('[wappalyzer ' + type + ']', '[' + source + ']', message);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
wappalyzer.driver.displayApps = detected => {
|
|
|
|
wappalyzer.driver.displayApps = detected => {
|
|
|
|
var apps = [];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Object.keys(detected).forEach(appName => {
|
|
|
|
Object.keys(detected).forEach(appName => {
|
|
|
|
const app = detected[appName];
|
|
|
|
const app = detected[appName];
|
|
|
|
|
|
|
|
|
|
|
@ -42,6 +40,7 @@ const driver = options => {
|
|
|
|
categories.push(category)
|
|
|
|
categories.push(category)
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ( !apps.some(detectedApp => detectedApp.name === app.name) ) {
|
|
|
|
apps.push({
|
|
|
|
apps.push({
|
|
|
|
name: app.name,
|
|
|
|
name: app.name,
|
|
|
|
confidence: app.confidenceTotal.toString(),
|
|
|
|
confidence: app.confidenceTotal.toString(),
|
|
|
@ -50,21 +49,41 @@ const driver = options => {
|
|
|
|
website: app.props.website,
|
|
|
|
website: app.props.website,
|
|
|
|
categories
|
|
|
|
categories
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
}
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
resolve(apps);
|
|
|
|
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
const browser = new Browser({
|
|
|
|
const browser = new Browser({
|
|
|
|
userAgent: options.userAgent
|
|
|
|
userAgent: options.userAgent,
|
|
|
|
|
|
|
|
waitDuration: options.maxWait + 'ms',
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
browser.visit(url, error => {
|
|
|
|
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const fetch = (pageUrl, index, depth) => {
|
|
|
|
|
|
|
|
return new Promise(async (resolve, reject) => {
|
|
|
|
|
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached
|
|
|
|
|
|
|
|
if ( analyzedPageUrls.indexOf(pageUrl.href) !== -1 || analyzedPageUrls.length >= options.maxUrls ) {
|
|
|
|
|
|
|
|
return resolve();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
analyzedPageUrls.push(pageUrl.href);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wappalyzer.log('depth: ' + depth + '; delay: ' + ( options.delay * index ) + 'ms; url: ' + pageUrl.href, 'driver');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Be nice
|
|
|
|
|
|
|
|
if ( options.delay ) {
|
|
|
|
|
|
|
|
await sleep(options.delay * index);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
browser.visit(pageUrl.href, error => {
|
|
|
|
if ( !browser.resources['0'] || !browser.resources['0'].response ) {
|
|
|
|
if ( !browser.resources['0'] || !browser.resources['0'].response ) {
|
|
|
|
return wappalyzer.log('No response from server', 'driver', 'error');
|
|
|
|
wappalyzer.log('No response from server', 'browser', 'error');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return resolve();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
browser.wait(options.maxWait)
|
|
|
|
browser.wait()
|
|
|
|
.catch(error => wappalyzer.log(error.message, 'browser'))
|
|
|
|
.catch(error => wappalyzer.log(error.message, 'browser'))
|
|
|
|
.finally(() => {
|
|
|
|
.finally(() => {
|
|
|
|
wappalyzer.driver.document = browser.document;
|
|
|
|
wappalyzer.driver.document = browser.document;
|
|
|
@ -75,6 +94,7 @@ const driver = options => {
|
|
|
|
if ( !headers[header[0]] ){
|
|
|
|
if ( !headers[header[0]] ){
|
|
|
|
headers[header[0]] = [];
|
|
|
|
headers[header[0]] = [];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
headers[header[0]].push(header[1]);
|
|
|
|
headers[header[0]].push(header[1]);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
@ -85,17 +105,40 @@ const driver = options => {
|
|
|
|
.filter(s => s.src)
|
|
|
|
.filter(s => s.src)
|
|
|
|
.map(s => s.src);
|
|
|
|
.map(s => s.src);
|
|
|
|
|
|
|
|
|
|
|
|
const hostname = wappalyzer.parseUrl(url).hostname;
|
|
|
|
wappalyzer.analyze(pageUrl.hostname, pageUrl.href, {
|
|
|
|
|
|
|
|
|
|
|
|
wappalyzer.analyze(hostname, url, {
|
|
|
|
|
|
|
|
headers,
|
|
|
|
headers,
|
|
|
|
html,
|
|
|
|
html,
|
|
|
|
env: vars,
|
|
|
|
env: vars,
|
|
|
|
scripts
|
|
|
|
scripts
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resolve(browser);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const crawl = async (pageUrl, index, depth) => {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
const browser = await fetch(pageUrl, index, depth);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ( options.recursive && depth < options.maxDepth && browser ) {
|
|
|
|
|
|
|
|
const links = Array.from(browser.body.getElementsByTagName('a')).filter(link => link.hostname === origPageUrl.hostname);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
await Promise.all(links.map(async (link, index) => {
|
|
|
|
|
|
|
|
link.hash = '';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return crawl(link, index, depth + 1);
|
|
|
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return Promise.resolve(apps);
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
return Promise.reject(error);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return crawl(origPageUrl, 1, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|