Performance improvements

main
Elbert Alias 8 years ago
parent 1c38da6e59
commit 3f42859e49

@ -117,46 +117,53 @@ class Driver {
pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname; pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname;
// Validate response
if ( !browser.resources['0'] || !browser.resources['0'].response ) { if ( !browser.resources['0'] || !browser.resources['0'].response ) {
this.wappalyzer.log('No response from server', 'browser', 'error'); this.wappalyzer.log('No response from server', 'browser', 'error');
return resolve(); return resolve();
} }
if ( !browser.document || !browser.document.documentElement || !browser.body ) { const headers = this.getHeaders(browser);
this.wappalyzer.log('No HTML document at ' + pageUrl.href, 'driver', 'error');
// Validate content type
const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null;
if ( !contentType || !/\btext\/html\b/.test(contentType) ) {
this.wappalyzer.log('Skipping ' + pageUrl.href + ' of content type ' + contentType, 'driver');
this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1);
return resolve(); return resolve();
} }
browser.wait(this.options.maxWait, () => { // Validate document element
this.timer('browser.wait end url: ' + pageUrl.href); if ( !browser.document || !browser.document.documentElement ) {
this.wappalyzer.log('No HTML document at ' + pageUrl.href, 'driver', 'error');
const headers = this.getHeaders(browser);
const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null; return resolve();
}
if ( !contentType || !/\btext\/html\b/.test(contentType) ) { const html = this.getHtml(browser);
this.wappalyzer.log('Skipping ' + pageUrl.href + ' of content type ' + contentType, 'driver'); const scripts = this.getScripts(browser);
this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1); const links = Array.from(browser.document.getElementsByTagName('a'))
.filter(link => link.hostname === this.origPageUrl.hostname)
.filter(link => extensions.test(link.pathname))
.map(link => { link.hash = ''; return url.parse(link.href) });
return resolve(); browser.wait(this.options.maxWait, () => {
} this.timer('browser.wait end url: ' + pageUrl.href);
const html = this.getHtml(browser); const js = this.getJs(browser);
const scripts = this.getScripts(browser);
const js = this.getJs(browser);
this.wappalyzer.analyze(pageUrl, { this.wappalyzer.analyze(pageUrl, {
headers, headers,
html, html,
js, scripts,
scripts js
}); });
const links = browser.body.getElementsByTagName('a');
return resolve(links); return resolve(links);
}); });
}); });
@ -183,7 +190,11 @@ class Driver {
try { try {
html = browser.html(); html = browser.html();
} catch ( e ) {
if ( html.length > 50000 ) {
html = html.substring(0, 25000) + html.substring(html.length - 25000, html.length);
}
} catch ( error ) {
this.wappalyzer.log(error.message, 'browser', 'error'); this.wappalyzer.log(error.message, 'browser', 'error');
} }
@ -237,11 +248,6 @@ class Driver {
this.fetch(pageUrl, index, depth) this.fetch(pageUrl, index, depth)
.then(links => { .then(links => {
if ( links && Boolean(this.options.recursive) && depth < this.options.maxDepth ) { if ( links && Boolean(this.options.recursive) && depth < this.options.maxDepth ) {
links = Array.from(links)
.filter(link => link.hostname === this.origPageUrl.hostname)
.filter(link => extensions.test(link.pathname))
.map(link => { link.hash = ''; return link });
return Promise.all(links.map((link, index) => this.crawl(link, index + 1, depth + 1))); return Promise.all(links.map((link, index) => this.crawl(link, index + 1, depth + 1)));
} else { } else {
return Promise.resolve(); return Promise.resolve();