Add timeout to evaluateHandle calls

main
Elbert Alias 4 years ago
parent 94c275b195
commit 892fe1ac14

@ -195,6 +195,14 @@ class Site {
} }
} }
timeout() {
return new Promise(() =>
setTimeout(() => {
throw new Error('The website took too long to respond')
}, this.options.maxWait)
)
}
async goto(url) { async goto(url) {
// Return when the URL is a duplicate or maxUrls has been reached // Return when the URL is a duplicate or maxUrls has been reached
if ( if (
@ -292,204 +300,218 @@ class Site {
try { try {
await Promise.race([ await Promise.race([
page.goto(url.href, { waitUntil: 'domcontentloaded' }), this.timeout(),
new Promise((resolve, reject) => page.goto(url.href, { waitUntil: 'domcontentloaded' })
setTimeout(
() => reject(new Error('The website took too long to respond')),
this.options.maxWait
)
)
]) ])
} catch (error) {
this.error(error)
}
await sleep(1000)
// Links
const links = await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
({ hash, hostname, href, pathname, protocol, rel }) => ({
hash,
hostname,
href,
pathname,
protocol,
rel
})
)
)
).jsonValue()
// Script tags
const scripts = await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('script'))
.map(({ src }) => src)
.filter((src) => src)
)
).jsonValue()
// Meta tags await sleep(1000)
const meta = await (
await page.evaluateHandle(() => // Links
Array.from(document.querySelectorAll('meta')).reduce((metas, meta) => { const links = await Promise.race([
const key = meta.getAttribute('name') || meta.getAttribute('property') this.timeout(),
await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
({ hash, hostname, href, pathname, protocol, rel }) => ({
hash,
hostname,
href,
pathname,
protocol,
rel
})
)
)
).jsonValue()
])
if (key) { // Script tags
metas[key.toLowerCase()] = [meta.getAttribute('content')] const scripts = await Promise.race([
} this.timeout(),
await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('script'))
.map(({ src }) => src)
.filter((src) => src)
)
).jsonValue()
])
return metas // Meta tags
}, {}) const meta = await Promise.race([
) this.timeout(),
).jsonValue() await (
await page.evaluateHandle(() =>
// JavaScript Array.from(document.querySelectorAll('meta')).reduce(
const js = await page.evaluate( (metas, meta) => {
(technologies) => { const key =
return technologies.reduce((technologies, { name, chains }) => { meta.getAttribute('name') || meta.getAttribute('property')
chains.forEach((chain) => {
const value = chain if (key) {
.split('.') metas[key.toLowerCase()] = [meta.getAttribute('content')]
.reduce( }
(value, method) =>
value && value.hasOwnProperty(method) return metas
? value[method] },
: undefined, {}
window )
) )
).jsonValue()
])
if (typeof value !== 'undefined') { // JavaScript
technologies.push({ const js = await Promise.race([
name, this.timeout(),
chain, await page.evaluate(
value: (technologies) => {
typeof value === 'string' || typeof value === 'number' return technologies.reduce((technologies, { name, chains }) => {
? value chains.forEach((chain) => {
: !!value const value = chain
.split('.')
.reduce(
(value, method) =>
value && value.hasOwnProperty(method)
? value[method]
: undefined,
window
)
if (typeof value !== 'undefined') {
technologies.push({
name,
chain,
value:
typeof value === 'string' || typeof value === 'number'
? value
: !!value
})
}
}) })
}
})
return technologies return technologies
}, []) }, [])
}, },
Wappalyzer.technologies Wappalyzer.technologies
.filter(({ js }) => Object.keys(js).length) .filter(({ js }) => Object.keys(js).length)
.map(({ name, js }) => ({ name, chains: Object.keys(js) })) .map(({ name, js }) => ({ name, chains: Object.keys(js) }))
) )
])
// Cookies
const cookies = (await page.cookies()).reduce(
(cookies, { name, value }) => ({
...cookies,
[name]: [value]
}),
{}
)
// HTML
let html = await page.content()
if (this.options.htmlMaxCols && this.options.htmlMaxRows) { // Cookies
const batches = [] const cookies = (await page.cookies()).reduce(
const rows = html.length / this.options.htmlMaxCols (cookies, { name, value }) => ({
...cookies,
[name]: [value]
}),
{}
)
for (let i = 0; i < rows; i += 1) { // HTML
if ( let html = await page.content()
i < this.options.htmlMaxRows / 2 ||
i > rows - this.options.htmlMaxRows / 2 if (this.options.htmlMaxCols && this.options.htmlMaxRows) {
) { const batches = []
batches.push( const rows = html.length / this.options.htmlMaxCols
html.slice(
i * this.options.htmlMaxCols, for (let i = 0; i < rows; i += 1) {
(i + 1) * this.options.htmlMaxCols if (
i < this.options.htmlMaxRows / 2 ||
i > rows - this.options.htmlMaxRows / 2
) {
batches.push(
html.slice(
i * this.options.htmlMaxCols,
(i + 1) * this.options.htmlMaxCols
)
) )
) }
} }
}
html = batches.join('\n') html = batches.join('\n')
} }
// Validate response // Validate response
if (!this.analyzedUrls[url.href].status) { if (!this.analyzedUrls[url.href].status) {
await page.close() await page.close()
this.log('Page closed') this.log('Page closed')
throw new Error('No response from server') throw new Error('No response from server')
} }
if (!this.language) { if (!this.language) {
this.language = await ( this.language = await Promise.race([
await page.evaluateHandle( this.timeout(),
() => await (
document.documentElement.getAttribute('lang') || await page.evaluateHandle(
document.documentElement.getAttribute('xml:lang') () =>
) document.documentElement.getAttribute('lang') ||
).jsonValue() document.documentElement.getAttribute('xml:lang')
} )
).jsonValue()
])
}
if (!this.language) { if (!this.language) {
try { try {
const [attrs] = languageDetect.detect( const [attrs] = languageDetect.detect(
html.replace(/<\/?[^>]+(>|$)/gs, ' '), html.replace(/<\/?[^>]+(>|$)/gs, ' '),
1 1
) )
if (attrs) { if (attrs) {
;[this.language] = attrs ;[this.language] = attrs
}
} catch (error) {
this.error(error)
} }
} catch (error) {
this.error(error)
} }
}
this.onDetect(analyzeJs(js)) this.onDetect(analyzeJs(js))
this.onDetect( this.onDetect(
analyze({ analyze({
url, url,
cookies, cookies,
html, html,
scripts, scripts,
meta meta
}) })
) )
const reducedLinks = Array.prototype.reduce.call( const reducedLinks = Array.prototype.reduce.call(
links, links,
(results, link) => { (results, link) => {
if ( if (
results && results &&
Object.prototype.hasOwnProperty.call( Object.prototype.hasOwnProperty.call(
Object.getPrototypeOf(results), Object.getPrototypeOf(results),
'push' 'push'
) && ) &&
link.protocol && link.protocol &&
link.protocol.match(/https?:/) && link.protocol.match(/https?:/) &&
link.rel !== 'nofollow' && link.rel !== 'nofollow' &&
link.hostname === url.hostname && link.hostname === url.hostname &&
extensions.test(link.pathname) extensions.test(link.pathname)
) { ) {
results.push(new URL(link.href.split('#')[0])) results.push(new URL(link.href.split('#')[0]))
} }
return results return results
}, },
[] []
) )
await page.close() await page.close()
this.log('Page closed') this.log('Page closed')
this.emit('goto', url) this.emit('goto', url)
return reducedLinks return reducedLinks
} catch (error) {
this.error(error)
}
} }
async analyze(url = this.originalUrl, index = 1, depth = 1) { async analyze(url = this.originalUrl, index = 1, depth = 1) {

@ -13,7 +13,7 @@
"software" "software"
], ],
"homepage": "https://www.wappalyzer.com", "homepage": "https://www.wappalyzer.com",
"version": "6.1.1", "version": "6.2.0",
"author": "Wappalyzer", "author": "Wappalyzer",
"license": "MIT", "license": "MIT",
"repository": { "repository": {
@ -38,4 +38,4 @@
"languagedetect": "^2.0.0", "languagedetect": "^2.0.0",
"puppeteer": "^2.0.0" "puppeteer": "^2.0.0"
} }
} }

@ -4,7 +4,7 @@
"author": "Wappalyzer", "author": "Wappalyzer",
"homepage_url": "https://www.wappalyzer.com", "homepage_url": "https://www.wappalyzer.com",
"description": "Identify web technologies", "description": "Identify web technologies",
"version": "6.0.16", "version": "6.2.0",
"default_locale": "en", "default_locale": "en",
"manifest_version": 2, "manifest_version": 2,
"icons": { "icons": {

@ -13,7 +13,7 @@
"software" "software"
], ],
"homepage": "https://www.wappalyzer.com", "homepage": "https://www.wappalyzer.com",
"version": "6.1.1", "version": "6.2.0",
"author": "Wappalyzer", "author": "Wappalyzer",
"license": "MIT", "license": "MIT",
"repository": { "repository": {
@ -27,4 +27,4 @@
"files": [ "files": [
"wappalyzer.js" "wappalyzer.js"
] ]
} }
Loading…
Cancel
Save