Build 5.4.3, NPM performance improvements, removed requestTimeout option

main
Elbert Alias 7 years ago
parent 3f42859e49
commit 77574d548c

@ -27,14 +27,13 @@ node index.js [url] [options]
### Options ### Options
``` ```
--debug=0|1 Output debug messages. --debug=0|1 Output debug messages.
--delay=ms Wait for ms milliseconds between requests. --delay=ms Wait for ms milliseconds between requests.
--max-depth=num Don't analyze pages more than num levels deep. --max-depth=num Don't analyze pages more than num levels deep.
--max-urls=num Exit when num URLs have been analyzed. --max-urls=num Exit when num URLs have been analyzed.
--max-wait=ms Wait no more than ms milliseconds for page resources to load. --max-wait=ms Wait no more than ms milliseconds for page resources to load.
--recursive=0|1 Follow links on pages (crawler). --recursive=0|1 Follow links on pages (crawler).
--request-timeout=ms Wait no more than ms millisecond for the page to load. --user-agent=str Set the user agent string.
--user-agent=str Set the user agent string.
``` ```
@ -46,9 +45,8 @@ const options = {
delay: 500, delay: 500,
maxDepth: 3, maxDepth: 3,
maxUrls: 10, maxUrls: 10,
maxWait: 1000, maxWait: 5000,
recursive: true, recursive: true,
requestTimeout: 3000,
userAgent: 'Wappalyzer', userAgent: 'Wappalyzer',
}; };

@ -17,9 +17,8 @@ class Driver {
delay: 500, delay: 500,
maxDepth: 3, maxDepth: 3,
maxUrls: 10, maxUrls: 10,
maxWait: 1000, maxWait: 5000,
recursive: false, recursive: false,
requestTimeout: 3000,
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)', userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
}, options || {}); }, options || {});
@ -29,7 +28,6 @@ class Driver {
this.options.maxUrls = parseInt(this.options.maxUrls, 10); this.options.maxUrls = parseInt(this.options.maxUrls, 10);
this.options.maxWait = parseInt(this.options.maxWait, 10); this.options.maxWait = parseInt(this.options.maxWait, 10);
this.options.recursive = Boolean(this.options.recursive); this.options.recursive = Boolean(this.options.recursive);
this.options.requestTimeout = parseInt(this.options.requestTimeout, 10);
this.origPageUrl = url.parse(pageUrl); this.origPageUrl = url.parse(pageUrl);
this.analyzedPageUrls = []; this.analyzedPageUrls = [];
@ -61,8 +59,6 @@ class Driver {
} }
displayApps(detected, meta) { displayApps(detected, meta) {
this.timer('displayApps');
this.meta = meta; this.meta = meta;
Object.keys(detected).forEach(appName => { Object.keys(detected).forEach(appName => {
@ -92,95 +88,112 @@ class Driver {
} }
fetch(pageUrl, index, depth) { fetch(pageUrl, index, depth) {
return new Promise(resolve => { // Return when the URL is a duplicate or maxUrls has been reached
// Return when the URL is a duplicate or maxUrls has been reached if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) {
if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) { return Promise.resolve();
return resolve(); }
}
this.timer('fetch url: ' + pageUrl.href + '; depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms'); this.analyzedPageUrls.push(pageUrl.href);
this.analyzedPageUrls.push(pageUrl.href); const timerScope = {
last: new Date().getTime()
};
const browser = new Browser({ this.timer('fetch; url: ' + pageUrl.href + '; depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms', timerScope);
silent: true,
userAgent: this.options.userAgent,
waitDuration: this.options.maxWait,
});
this.sleep(this.options.delay * index) return new Promise(resolve => this.sleep(this.options.delay * index).then(() => this.visit(pageUrl, timerScope, resolve)));
.then(() => { }
this.timer('browser.visit start url: ' + pageUrl.href);
browser.visit(pageUrl.href, this.options.requestTimeout, error => { visit(pageUrl, timerScope, resolve) {
this.timer('browser.visit end url: ' + pageUrl.href); const browser = new Browser({
silent: true,
userAgent: this.options.userAgent,
waitDuration: this.options.maxWait,
});
pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname; this.timer('browser.visit start; url: ' + pageUrl.href, timerScope);
// Validate response browser.visit(pageUrl.href, () => {
if ( !browser.resources['0'] || !browser.resources['0'].response ) { this.timer('browser.visit end; url: ' + pageUrl.href, timerScope);
this.wappalyzer.log('No response from server', 'browser', 'error');
return resolve(); if ( !this.responseOk(browser, pageUrl) ) {
} return resolve();
}
const headers = this.getHeaders(browser); const headers = this.getHeaders(browser);
const html = this.getHtml(browser);
const scripts = this.getScripts(browser);
const js = this.getJs(browser);
// Validate content type this.wappalyzer.analyze(pageUrl, {
const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null; headers,
html,
scripts,
js
});
if ( !contentType || !/\btext\/html\b/.test(contentType) ) { const links = Array.from(browser.document.getElementsByTagName('a'))
this.wappalyzer.log('Skipping ' + pageUrl.href + ' of content type ' + contentType, 'driver'); .filter(link => link.hostname === this.origPageUrl.hostname)
.filter(link => extensions.test(link.pathname))
.map(link => { link.hash = ''; return url.parse(link.href) });
this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1); return resolve(links);
});
}
return resolve(); responseOk(browser, pageUrl) {
} // Validate response
const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null;
// Validate document element if ( !resource ) {
if ( !browser.document || !browser.document.documentElement ) { this.wappalyzer.log('No response from server; url: ' + pageUrl.href, 'driver', 'error');
this.wappalyzer.log('No HTML document at ' + pageUrl.href, 'driver', 'error');
return resolve(); return false;
} }
const html = this.getHtml(browser); if ( resource.response.status !== 200 ) {
const scripts = this.getScripts(browser); this.wappalyzer.log('Response was not OK; status: ' + resource.response.status + ' ' + resource.response.statusText + '; url: ' + pageUrl.href, 'driver', 'error');
const links = Array.from(browser.document.getElementsByTagName('a')) return false;
.filter(link => link.hostname === this.origPageUrl.hostname) }
.filter(link => extensions.test(link.pathname))
.map(link => { link.hash = ''; return url.parse(link.href) });
browser.wait(this.options.maxWait, () => { const headers = this.getHeaders(browser);
this.timer('browser.wait end url: ' + pageUrl.href);
const js = this.getJs(browser); // Validate content type
const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null;
this.wappalyzer.analyze(pageUrl, { if ( !contentType || !/\btext\/html\b/.test(contentType) ) {
headers, this.wappalyzer.log('Skipping; url: ' + pageUrl.href + '; content type: ' + contentType, 'driver');
html,
scripts,
js
});
return resolve(links); this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1);
});
}); return false;
}); }
});
// Validate document
if ( !browser.document || !browser.document.documentElement ) {
this.wappalyzer.log('No HTML document; url: ' + pageUrl.href, 'driver', 'error');
return false;
}
return true;
} }
getHeaders(browser) { getHeaders(browser) {
const headers = {}; const headers = {};
browser.resources['0'].response.headers._headers.forEach(header => { const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null;
if ( !headers[header[0]] ){
headers[header[0]] = [];
}
headers[header[0]].push(header[1]); if ( resource ) {
}); resource.response.headers._headers.forEach(header => {
if ( !headers[header[0]] ){
headers[header[0]] = [];
}
headers[header[0]].push(header[1]);
});
}
return headers; return headers;
} }
@ -244,6 +257,8 @@ class Driver {
} }
crawl(pageUrl, index = 1, depth = 1) { crawl(pageUrl, index = 1, depth = 1) {
pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname;
return new Promise(resolve => { return new Promise(resolve => {
this.fetch(pageUrl, index, depth) this.fetch(pageUrl, index, depth)
.then(links => { .then(links => {
@ -267,14 +282,14 @@ class Driver {
return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve(); return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve();
} }
timer(step) { timer(message, scope) {
const time = new Date().getTime(); const time = new Date().getTime();
const sinceStart = ( Math.round(( time - this.time.start ) / 10) / 100) + 's'; const sinceStart = ( Math.round(( time - this.time.start ) / 10) / 100) + 's';
const sinceLast = ( Math.round(( time - this.time.last ) / 10) / 100) + 's'; const sinceLast = ( Math.round(( time - scope.last ) / 10) / 100) + 's';
this.wappalyzer.log('[' + step + '] Time lapsed: ' + sinceLast + ' / ' + sinceStart, 'driver'); this.wappalyzer.log('[timer] ' + message + '; lapsed: ' + sinceLast + ' / ' + sinceStart, 'driver');
this.time.last = time; scope.last = time;
} }
}; };

@ -2,7 +2,7 @@
"name": "wappalyzer", "name": "wappalyzer",
"description": "Uncovers the technologies used on websites", "description": "Uncovers the technologies used on websites",
"homepage": "https://github.com/AliasIO/Wappalyzer", "homepage": "https://github.com/AliasIO/Wappalyzer",
"version": "5.4.2", "version": "5.4.3",
"author": "Elbert Alias", "author": "Elbert Alias",
"license": "GPL-3.0", "license": "GPL-3.0",
"repository": { "repository": {

@ -4,7 +4,7 @@
"author": "Elbert Alias", "author": "Elbert Alias",
"homepage_url": "https://www.wappalyzer.com", "homepage_url": "https://www.wappalyzer.com",
"description": "Identify web technologies", "description": "Identify web technologies",
"version": "5.4.2", "version": "5.4.3",
"default_locale": "en", "default_locale": "en",
"manifest_version": 2, "manifest_version": 2,
"icons": { "icons": {

Loading…
Cancel
Save