Add the ability to use a different headless browser in NPM driver

main
Elbert Alias 6 years ago
parent 879cb32d30
commit 2cb284594f

@ -2695,7 +2695,7 @@
"X-Drupal-Cache": "", "X-Drupal-Cache": "",
"X-Generator": "^Drupal(?:\\s([\\d.]+))?\\;version:\\1" "X-Generator": "^Drupal(?:\\s([\\d.]+))?\\;version:\\1"
}, },
"html": "<(?:link|style)[^>]+sites/(?:default|all)/(?:themes|modules)/", "html": "<(?:link|style)[^>]+\"/sites/(?:default|all)/(?:themes|modules)/",
"icon": "Drupal.svg", "icon": "Drupal.svg",
"implies": "PHP", "implies": "PHP",
"js": { "js": {

@ -0,0 +1,20 @@
class Browser {
constructor(options) {
this.options = options;
this.window = null;
this.document = null;
this.statusCode = null;
this.contentType = null;
this.headers = null;
this.statusCode = null;
this.contentType = null;
this.html = null;
this.js = null;
this.links = null;
this.scripts = null;
this.cookies = null;
}
}
module.exports = Browser;

@ -0,0 +1,114 @@
const Zombie = require('zombie');
class Browser {
constructor(options) {
this.options = options;
this.browser = new Zombie({
proxy: options.proxy,
silent: true,
strictSSL: false,
userAgent: options.userAgent,
waitDuration: options.maxWait,
});
this.statusCode = null;
this.contentType = null;
this.headers = null;
this.statusCode = null;
this.contentType = null;
this.html = null;
this.scripts = null;
this.cookies = null;
this.window = this.browser.window;
this.document = this.browser.document;
this.browser.on('authenticate', (auth) => {
auth.username = this.options.username;
auth.password = this.options.password;
});
}
visit(url) {
return new Promise((resolve) => {
this.browser.visit(url, () => {
const resource = this.browser.resources.length
? this.browser.resources.filter(_resource => _resource.response).shift() : null;
this.headers = this.getHeaders();
this.statusCode = resource ? resource.response.status : 0;
this.contentType = this.headers['content-type'] ? this.headers['content-type'].shift() : null;
this.html = this.getHtml();
this.scripts = this.getScripts();
this.cookies = this.getCookies();
resolve();
});
});
}
getHeaders() {
const headers = {};
const resource = this.browser.resources.length
? this.browser.resources.filter(_resource => _resource.response).shift() : null;
if (resource) {
// eslint-disable-next-line no-underscore-dangle
resource.response.headers._headers.forEach((header) => {
if (!headers[header[0]]) {
headers[header[0]] = [];
}
headers[header[0]].push(header[1]);
});
}
return headers;
}
getHtml() {
let html = '';
if (this.browser.document && this.browser.document.documentElement) {
try {
html = this.browser.html();
} catch (error) {
this.log(error.message, 'error');
}
}
return html;
}
getScripts() {
if (!this.browser.document || !this.browser.document.scripts) {
return [];
}
const scripts = Array.prototype.slice
.apply(this.browser.document.scripts)
.filter(script => script.src)
.map(script => script.src);
return scripts;
}
getCookies() {
const cookies = [];
if (this.browser.cookies) {
this.browser.cookies.forEach(cookie => cookies.push({
name: cookie.key,
value: cookie.value,
domain: cookie.domain,
path: cookie.path,
}));
}
return cookies;
}
}
export default Browser;

@ -0,0 +1,119 @@
const Zombie = require('zombie');
const Browser = require('../browser');
class ZombieBrowser extends Browser {
constructor(options) {
super(options);
this.browser = new Zombie({
proxy: options.proxy,
silent: true,
strictSSL: false,
userAgent: options.userAgent,
waitDuration: options.maxWait,
});
this.browser.on('authenticate', (auth) => {
auth.username = this.options.username;
auth.password = this.options.password;
});
}
visit(url) {
return new Promise((resolve) => {
this.browser.visit(url, () => {
const resource = this.browser.resources.length
? this.browser.resources.filter(_resource => _resource.response).shift() : null;
this.headers = this.getHeaders();
this.statusCode = resource ? resource.response.status : 0;
this.contentType = this.headers['content-type'] ? this.headers['content-type'].shift() : null;
this.html = this.getHtml();
this.js = this.getJs();
this.links = this.getLinks();
this.scripts = this.getScripts();
this.cookies = this.getCookies();
resolve();
});
});
}
getHeaders() {
const headers = {};
const resource = this.browser.resources.length
? this.browser.resources.filter(_resource => _resource.response).shift() : null;
if (resource) {
// eslint-disable-next-line no-underscore-dangle
resource.response.headers._headers.forEach((header) => {
if (!headers[header[0]]) {
headers[header[0]] = [];
}
headers[header[0]].push(header[1]);
});
}
return headers;
}
getHtml() {
let html = '';
if (this.browser.document && this.browser.document.documentElement) {
try {
html = this.browser.html();
} catch (error) {
this.log(error.message, 'error');
}
}
return html;
}
getScripts() {
let scripts = [];
if (this.browser.document && this.browser.document.scripts) {
scripts = Array.prototype.slice
.apply(this.browser.document.scripts)
.filter(script => script.src)
.map(script => script.src);
}
return scripts;
}
getJs() {
return this.browser.window;
}
getLinks() {
let links = [];
if (this.browser.document) {
links = Array.from(this.browser.document.getElementsByTagName('a'));
}
return links;
}
getCookies() {
const cookies = [];
if (this.browser.cookies) {
this.browser.cookies.forEach(cookie => cookies.push({
name: cookie.key,
value: cookie.value,
domain: cookie.domain,
path: cookie.path,
}));
}
return cookies;
}
}
module.exports = ZombieBrowser;

@ -1,7 +1,6 @@
const url = require('url'); const url = require('url');
const fs = require('fs'); const fs = require('fs');
const path = require('path'); const path = require('path');
const Browser = require('zombie');
const Wappalyzer = require('./wappalyzer'); const Wappalyzer = require('./wappalyzer');
const json = JSON.parse(fs.readFileSync(path.resolve(`${__dirname}/apps.json`))); const json = JSON.parse(fs.readFileSync(path.resolve(`${__dirname}/apps.json`)));
@ -18,56 +17,8 @@ function sleep(ms) {
return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve(); return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve();
} }
function getHeaders(browser) {
const headers = {};
const resource = browser.resources.length
? browser.resources.filter(_resource => _resource.response).shift() : null;
if (resource) {
// eslint-disable-next-line no-underscore-dangle
resource.response.headers._headers.forEach((header) => {
if (!headers[header[0]]) {
headers[header[0]] = [];
}
headers[header[0]].push(header[1]);
});
}
return headers;
}
function getScripts(browser) {
if (!browser.document || !browser.document.scripts) {
return [];
}
const scripts = Array.prototype.slice
.apply(browser.document.scripts)
.filter(script => script.src)
.map(script => script.src);
return scripts;
}
function getCookies(browser) {
const cookies = [];
if (browser.cookies) {
browser.cookies.forEach(cookie => cookies.push({
name: cookie.key,
value: cookie.value,
domain: cookie.domain,
path: cookie.path,
}));
}
return cookies;
}
class Driver { class Driver {
constructor(pageUrl, options) { constructor(Browser, pageUrl, options) {
this.options = Object.assign({}, { this.options = Object.assign({}, {
password: '', password: '',
proxy: null, proxy: null,
@ -98,6 +49,8 @@ class Driver {
this.apps = []; this.apps = [];
this.meta = {}; this.meta = {};
this.Browser = Browser;
this.wappalyzer = new Wappalyzer(); this.wappalyzer = new Wappalyzer();
this.wappalyzer.apps = json.apps; this.wappalyzer.apps = json.apps;
@ -175,119 +128,70 @@ class Driver {
this.timer(`fetch; url: ${pageUrl.href}; depth: ${depth}; delay: ${this.options.delay * index}ms`, timerScope); this.timer(`fetch; url: ${pageUrl.href}; depth: ${depth}; delay: ${this.options.delay * index}ms`, timerScope);
return new Promise((resolve, reject) => { return new Promise(async (resolve, reject) => {
sleep(this.options.delay * index) await sleep(this.options.delay * index);
.then(() => this.visit(pageUrl, timerScope, resolve, reject));
});
}
visit(pageUrl, timerScope, resolve, reject) {
const browser = new Browser({
proxy: this.options.proxy,
silent: true,
strictSSL: false,
userAgent: this.options.userAgent,
waitDuration: this.options.maxWait,
});
browser.on('authenticate', (auth) => { this.visit(pageUrl, timerScope, resolve, reject);
auth.username = this.options.username;
auth.password = this.options.password;
}); });
}
this.timer(`browser.visit start; url: ${pageUrl.href}`, timerScope); async visit(pageUrl, timerScope, resolve, reject) {
const browser = new this.Browser(this.options);
browser.visit(pageUrl.href, () => { browser.log = (message, type) => this.wappalyzer.log(message, 'browser', type);
this.timer(`browser.visit end; url: ${pageUrl.href}`, timerScope);
try { this.timer(`visit start; url: ${pageUrl.href}`, timerScope);
if (!this.checkResponse(browser, pageUrl)) {
resolve();
return; await browser.visit(pageUrl.href);
}
} catch (error) {
reject(error);
return; this.timer(`visit end; url: ${pageUrl.href}`, timerScope);
}
const headers = getHeaders(browser); this.analyzedPageUrls[pageUrl.href].status = browser.statusCode;
const html = this.getHtml(browser);
const scripts = getScripts(browser);
const js = this.getJs(browser);
const cookies = getCookies(browser);
this.wappalyzer.analyze(pageUrl, {
headers,
html,
scripts,
js,
cookies,
})
.then(() => {
const links = Array.prototype.reduce.call(
browser.document.getElementsByTagName('a'), (results, link) => {
if (link.protocol.match(/https?:/) && link.hostname === this.origPageUrl.hostname && extensions.test(link.pathname)) {
link.hash = '';
results.push(url.parse(link.href));
}
return results;
}, [],
);
return resolve(links);
});
});
}
checkResponse(browser, pageUrl) {
// Validate response // Validate response
const resource = browser.resources.length if (!browser.statusCode) {
? browser.resources.filter(_resource => _resource.response).shift() : null; reject(new Error('NO_RESPONSE'));
}
if (!resource) { if (browser.statusCode !== 200) {
throw new Error('NO_RESPONSE'); reject(new Error('RESPONSE_NOT_OK'));
} }
this.analyzedPageUrls[pageUrl.href].status = resource.response.status; if (!browser.contentType || !/\btext\/html\b/.test(browser.contentType)) {
this.wappalyzer.log(`Skipping; url: ${pageUrl.href}; content type: ${browser.contentType}`, 'driver');
if (resource.response.status !== 200) { delete this.analyzedPageUrls[pageUrl.href];
throw new Error('RESPONSE_NOT_OK');
} }
const headers = getHeaders(browser); const { cookies, headers, scripts } = browser;
// Validate content type const html = this.processHtml(browser.html);
const contentType = headers['content-type'] ? headers['content-type'].shift() : null; const js = this.processJs(browser.js);
if (!contentType || !/\btext\/html\b/.test(contentType)) { await this.wappalyzer.analyze(pageUrl, {
this.wappalyzer.log(`Skipping; url: ${pageUrl.href}; content type: ${contentType}`, 'driver'); cookies,
headers,
html,
js,
scripts,
});
delete this.analyzedPageUrls[pageUrl.href]; const reducedLinks = Array.prototype.reduce.call(
browser.links, (results, link) => {
if (link.protocol.match(/https?:/) && link.hostname === this.origPageUrl.hostname && extensions.test(link.pathname)) {
link.hash = '';
return false; results.push(url.parse(link.href));
} }
// Validate document return results;
if (!browser.document || !browser.document.documentElement) { }, [],
throw new Error('NO_HTML_DOCUMENT'); );
}
return true; return resolve(reducedLinks);
} }
getHtml(browser) { processHtml(html) {
let html = '';
try {
html = browser.html();
} catch (error) {
this.wappalyzer.log(error.message, 'browser', 'error');
}
if (this.options.htmlMaxCols || this.options.htmlMaxRows) { if (this.options.htmlMaxCols || this.options.htmlMaxRows) {
const chunks = []; const chunks = [];
const maxCols = this.options.htmlMaxCols; const maxCols = this.options.htmlMaxCols;
@ -308,7 +212,7 @@ class Driver {
return html; return html;
} }
getJs(browser) { processJs(window) {
const patterns = this.wappalyzer.jsPatterns; const patterns = this.wappalyzer.jsPatterns;
const js = {}; const js = {};
@ -323,7 +227,7 @@ class Driver {
let value = properties let value = properties
.reduce((parent, property) => (parent && parent[property] .reduce((parent, property) => (parent && parent[property]
? parent[property] : null), browser.window); ? parent[property] : null), window);
value = typeof value === 'string' || typeof value === 'number' ? value : !!value; value = typeof value === 'string' || typeof value === 'number' ? value : !!value;
@ -340,32 +244,32 @@ class Driver {
crawl(pageUrl, index = 1, depth = 1) { crawl(pageUrl, index = 1, depth = 1) {
pageUrl.canonical = `${pageUrl.protocol}//${pageUrl.host}${pageUrl.pathname}`; pageUrl.canonical = `${pageUrl.protocol}//${pageUrl.host}${pageUrl.pathname}`;
return new Promise((resolve) => { return new Promise(async (resolve) => {
this.fetch(pageUrl, index, depth) let links;
.catch((error) => {
const type = error.message && errorTypes[error.message] ? error.message : 'UNKNOWN_ERROR'; try {
const message = error.message && errorTypes[error.message] ? errorTypes[error.message] : 'Unknown error'; links = await this.fetch(pageUrl, index, depth);
} catch (error) {
this.analyzedPageUrls[pageUrl.href].error = { const type = error.message && errorTypes[error.message] ? error.message : 'UNKNOWN_ERROR';
type, const message = error.message && errorTypes[error.message] ? errorTypes[error.message] : 'Unknown error';
message,
}; this.analyzedPageUrls[pageUrl.href].error = {
type,
this.wappalyzer.log(`${message}; url: ${pageUrl.href}`, 'driver', 'error'); message,
}) };
.then((links) => {
if (links && this.options.recursive && depth < this.options.maxDepth) { this.wappalyzer.log(`${message}; url: ${pageUrl.href}`, 'driver', 'error');
return this.chunk(links.slice(0, this.options.maxUrls), depth + 1); }
}
return Promise.resolve(); if (links && this.options.recursive && depth < this.options.maxDepth) {
}) await this.chunk(links.slice(0, this.options.maxUrls), depth + 1);
.then(() => { }
resolve({
urls: this.analyzedPageUrls, return resolve({
applications: this.apps, urls: this.analyzedPageUrls,
meta: this.meta, applications: this.apps,
}); meta: this.meta,
}); });
}); });
} }
@ -376,10 +280,12 @@ class Driver {
const chunked = links.splice(0, this.options.chunkSize); const chunked = links.splice(0, this.options.chunkSize);
return new Promise((resolve) => { return new Promise(async (resolve) => {
Promise.all(chunked.map((link, index) => this.crawl(link, index, depth))) await Promise.all(chunked.map((link, index) => this.crawl(link, index, depth)));
.then(() => this.chunk(links, depth, chunk + 1))
.then(() => resolve()); await this.chunk(links, depth, chunk + 1);
resolve();
}); });
} }

@ -1,6 +1,6 @@
#!/usr/bin/env node #!/usr/bin/env node
const Browser = require('./browsers/zombie');
const Wappalyzer = require('./driver'); const Wappalyzer = require('./driver');
const args = process.argv.slice(2); const args = process.argv.slice(2);
@ -30,7 +30,10 @@ do {
} }
} while (arg); } while (arg);
const wappalyzer = new Wappalyzer(url, options); const wappalyzer = new Wappalyzer(Browser, url, options);
// Optionally define a custom log function
// wappalyzer.log = (message, source, type) => console.log(message);
wappalyzer.analyze() wappalyzer.analyze()
.then((json) => { .then((json) => {

@ -1,6 +1,6 @@
{ {
"name": "wappalyzer", "name": "wappalyzer",
"version": "5.5.6", "version": "5.5.7",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

@ -2,7 +2,7 @@
"name": "wappalyzer", "name": "wappalyzer",
"description": "Uncovers the technologies used on websites", "description": "Uncovers the technologies used on websites",
"homepage": "https://github.com/AliasIO/Wappalyzer", "homepage": "https://github.com/AliasIO/Wappalyzer",
"version": "5.5.7", "version": "5.6.0",
"author": "Elbert Alias", "author": "Elbert Alias",
"license": "GPL-3.0", "license": "GPL-3.0",
"repository": { "repository": {

@ -4,7 +4,7 @@
"author": "Elbert Alias", "author": "Elbert Alias",
"homepage_url": "https://www.wappalyzer.com", "homepage_url": "https://www.wappalyzer.com",
"description": "Identify web technologies", "description": "Identify web technologies",
"version": "5.5.7", "version": "5.6.0",
"default_locale": "en", "default_locale": "en",
"manifest_version": 2, "manifest_version": 2,
"icons": { "icons": {

@ -160,7 +160,7 @@ class Wappalyzer {
this.detected[url.canonical] = {}; this.detected[url.canonical] = {};
} }
const metas = []; const metaTags = [];
// Additional information // Additional information
let language = null; let language = null;
@ -170,23 +170,22 @@ class Wappalyzer {
html = ''; html = '';
} }
const matches = data.html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i); let matches = data.html.match(new RegExp('<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i'));
language = matches && matches.length ? matches[1] : null; language = matches && matches.length ? matches[1] : null;
// grab metas // Meta tags
const regex = /<meta[^>]+>/ig; const regex = /<meta[^>]+>/ig;
let metaMatches;
do { do {
metaMatches = regex.exec(html); matches = regex.exec(html);
if (!metaMatches) { if (!matches) {
break; break;
} }
const [match] = metaMatches; metaTags.push(matches[0]);
metas.push(match); } while (matches);
} while (metaMatches);
} }
Object.keys(this.apps).forEach((appName) => { Object.keys(this.apps).forEach((appName) => {
@ -200,7 +199,7 @@ class Wappalyzer {
if (html) { if (html) {
promises.push(this.analyzeHtml(app, html)); promises.push(this.analyzeHtml(app, html));
promises.push(this.analyzeMeta(app, metas)); promises.push(this.analyzeMeta(app, metaTags));
} }
if (scripts) { if (scripts) {
@ -224,33 +223,32 @@ class Wappalyzer {
}); });
} }
return new Promise((resolve) => { return new Promise(async (resolve) => {
Promise.all(promises) await Promise.all(promises);
.then(() => {
Object.keys(apps).forEach((appName) => {
const app = apps[appName];
if (!app.detected || !app.getConfidence()) { Object.keys(apps).forEach((appName) => {
delete apps[app.name]; const app = apps[appName];
}
});
resolveExcludes(apps, this.detected[url]); if (!app.detected || !app.getConfidence()) {
this.resolveImplies(apps, url.canonical); delete apps[app.name];
}
});
this.cacheDetectedApps(apps, url.canonical); resolveExcludes(apps, this.detected[url]);
this.trackDetectedApps(apps, url, language); this.resolveImplies(apps, url.canonical);
this.log(`Processing ${Object.keys(data).join(', ')} took ${((new Date() - startTime) / 1000).toFixed(2)}s (${url.hostname})`, 'core'); this.cacheDetectedApps(apps, url.canonical);
this.trackDetectedApps(apps, url, language);
if (Object.keys(apps).length) { this.log(`Processing ${Object.keys(data).join(', ')} took ${((new Date() - startTime) / 1000).toFixed(2)}s (${url.hostname})`, 'core');
this.log(`Identified ${Object.keys(apps).join(', ')} (${url.hostname})`, 'core');
}
this.driver.displayApps(this.detected[url.canonical], { language }, context); if (Object.keys(apps).length) {
this.log(`Identified ${Object.keys(apps).join(', ')} (${url.hostname})`, 'core');
}
return resolve(); this.driver.displayApps(this.detected[url.canonical], { language }, context);
});
return resolve();
}); });
} }
@ -265,23 +263,20 @@ class Wappalyzer {
* *
*/ */
robotsTxtAllows(url) { robotsTxtAllows(url) {
return new Promise((resolve, reject) => { return new Promise(async (resolve, reject) => {
const parsed = this.parseUrl(url); const parsed = this.parseUrl(url);
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
reject(); return reject();
return;
} }
this.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:') const robotsTxt = await this.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:');
.then((robotsTxt) => {
if (robotsTxt.some(disallowedPath => parsed.pathname.indexOf(disallowedPath) === 0)) { if (robotsTxt.some(disallowedPath => parsed.pathname.indexOf(disallowedPath) === 0)) {
return reject(); return reject();
} }
return resolve(); return resolve();
}, () => resolve());
}); });
} }
@ -376,10 +371,10 @@ class Wappalyzer {
try { try {
attrs.regex = new RegExp(attr.replace('/', '\\/'), 'i'); // Escape slashes in regular expression attrs.regex = new RegExp(attr.replace('/', '\\/'), 'i'); // Escape slashes in regular expression
} catch (e) { } catch (error) {
attrs.regex = new RegExp(); attrs.regex = new RegExp();
this.log(`${e}: ${attr}`, 'error', 'core'); this.log(`${error.message}: ${attr}`, 'error', 'core');
} }
} }
}); });
@ -572,7 +567,7 @@ class Wappalyzer {
/** /**
* Analyze meta tag * Analyze meta tag
*/ */
analyzeMeta(app, metas) { analyzeMeta(app, metaTags) {
const patterns = this.parsePatterns(app.props.meta); const patterns = this.parsePatterns(app.props.meta);
const promises = []; const promises = [];
@ -580,7 +575,7 @@ class Wappalyzer {
return Promise.resolve(); return Promise.resolve();
} }
metas.forEach((match) => { metaTags.forEach((match) => {
Object.keys(patterns).forEach((meta) => { Object.keys(patterns).forEach((meta) => {
const r = new RegExp(`(?:name|property)=["']${meta}["']`, 'i'); const r = new RegExp(`(?:name|property)=["']${meta}["']`, 'i');

Loading…
Cancel
Save