|
|
@ -117,46 +117,53 @@ class Driver {
|
|
|
|
|
|
|
|
|
|
|
|
pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname;
|
|
|
|
pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Validate response
|
|
|
|
if ( !browser.resources['0'] || !browser.resources['0'].response ) {
|
|
|
|
if ( !browser.resources['0'] || !browser.resources['0'].response ) {
|
|
|
|
this.wappalyzer.log('No response from server', 'browser', 'error');
|
|
|
|
this.wappalyzer.log('No response from server', 'browser', 'error');
|
|
|
|
|
|
|
|
|
|
|
|
return resolve();
|
|
|
|
return resolve();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if ( !browser.document || !browser.document.documentElement || !browser.body ) {
|
|
|
|
const headers = this.getHeaders(browser);
|
|
|
|
this.wappalyzer.log('No HTML document at ' + pageUrl.href, 'driver', 'error');
|
|
|
|
|
|
|
|
|
|
|
|
// Validate content type
|
|
|
|
|
|
|
|
const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ( !contentType || !/\btext\/html\b/.test(contentType) ) {
|
|
|
|
|
|
|
|
this.wappalyzer.log('Skipping ' + pageUrl.href + ' of content type ' + contentType, 'driver');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1);
|
|
|
|
|
|
|
|
|
|
|
|
return resolve();
|
|
|
|
return resolve();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
browser.wait(this.options.maxWait, () => {
|
|
|
|
// Validate document element
|
|
|
|
this.timer('browser.wait end url: ' + pageUrl.href);
|
|
|
|
if ( !browser.document || !browser.document.documentElement ) {
|
|
|
|
|
|
|
|
this.wappalyzer.log('No HTML document at ' + pageUrl.href, 'driver', 'error');
|
|
|
|
const headers = this.getHeaders(browser);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null;
|
|
|
|
return resolve();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if ( !contentType || !/\btext\/html\b/.test(contentType) ) {
|
|
|
|
const html = this.getHtml(browser);
|
|
|
|
this.wappalyzer.log('Skipping ' + pageUrl.href + ' of content type ' + contentType, 'driver');
|
|
|
|
const scripts = this.getScripts(browser);
|
|
|
|
|
|
|
|
|
|
|
|
this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1);
|
|
|
|
const links = Array.from(browser.document.getElementsByTagName('a'))
|
|
|
|
|
|
|
|
.filter(link => link.hostname === this.origPageUrl.hostname)
|
|
|
|
|
|
|
|
.filter(link => extensions.test(link.pathname))
|
|
|
|
|
|
|
|
.map(link => { link.hash = ''; return url.parse(link.href) });
|
|
|
|
|
|
|
|
|
|
|
|
return resolve();
|
|
|
|
browser.wait(this.options.maxWait, () => {
|
|
|
|
}
|
|
|
|
this.timer('browser.wait end url: ' + pageUrl.href);
|
|
|
|
|
|
|
|
|
|
|
|
const html = this.getHtml(browser);
|
|
|
|
const js = this.getJs(browser);
|
|
|
|
const scripts = this.getScripts(browser);
|
|
|
|
|
|
|
|
const js = this.getJs(browser);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.wappalyzer.analyze(pageUrl, {
|
|
|
|
this.wappalyzer.analyze(pageUrl, {
|
|
|
|
headers,
|
|
|
|
headers,
|
|
|
|
html,
|
|
|
|
html,
|
|
|
|
js,
|
|
|
|
scripts,
|
|
|
|
scripts
|
|
|
|
js
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
const links = browser.body.getElementsByTagName('a');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return resolve(links);
|
|
|
|
return resolve(links);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
@ -183,7 +190,11 @@ class Driver {
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
html = browser.html();
|
|
|
|
html = browser.html();
|
|
|
|
} catch ( e ) {
|
|
|
|
|
|
|
|
|
|
|
|
if ( html.length > 50000 ) {
|
|
|
|
|
|
|
|
html = html.substring(0, 25000) + html.substring(html.length - 25000, html.length);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch ( error ) {
|
|
|
|
this.wappalyzer.log(error.message, 'browser', 'error');
|
|
|
|
this.wappalyzer.log(error.message, 'browser', 'error');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -237,11 +248,6 @@ class Driver {
|
|
|
|
this.fetch(pageUrl, index, depth)
|
|
|
|
this.fetch(pageUrl, index, depth)
|
|
|
|
.then(links => {
|
|
|
|
.then(links => {
|
|
|
|
if ( links && Boolean(this.options.recursive) && depth < this.options.maxDepth ) {
|
|
|
|
if ( links && Boolean(this.options.recursive) && depth < this.options.maxDepth ) {
|
|
|
|
links = Array.from(links)
|
|
|
|
|
|
|
|
.filter(link => link.hostname === this.origPageUrl.hostname)
|
|
|
|
|
|
|
|
.filter(link => extensions.test(link.pathname))
|
|
|
|
|
|
|
|
.map(link => { link.hash = ''; return link });
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return Promise.all(links.map((link, index) => this.crawl(link, index + 1, depth + 1)));
|
|
|
|
return Promise.all(links.map((link, index) => this.crawl(link, index + 1, depth + 1)));
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
return Promise.resolve();
|
|
|
|
return Promise.resolve();
|
|
|
|