Add timeout to evaluateHandle calls

main
Elbert Alias 4 years ago
parent 94c275b195
commit 892fe1ac14

@ -195,6 +195,14 @@ class Site {
}
}
timeout() {
return new Promise(() =>
setTimeout(() => {
throw new Error('The website took too long to respond')
}, this.options.maxWait)
)
}
async goto(url) {
// Return when the URL is a duplicate or maxUrls has been reached
if (
@ -292,204 +300,218 @@ class Site {
try {
await Promise.race([
page.goto(url.href, { waitUntil: 'domcontentloaded' }),
new Promise((resolve, reject) =>
setTimeout(
() => reject(new Error('The website took too long to respond')),
this.options.maxWait
)
)
this.timeout(),
page.goto(url.href, { waitUntil: 'domcontentloaded' })
])
} catch (error) {
this.error(error)
}
await sleep(1000)
// Links
const links = await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
({ hash, hostname, href, pathname, protocol, rel }) => ({
hash,
hostname,
href,
pathname,
protocol,
rel
})
)
)
).jsonValue()
// Script tags
const scripts = await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('script'))
.map(({ src }) => src)
.filter((src) => src)
)
).jsonValue()
// Meta tags
const meta = await (
await page.evaluateHandle(() =>
Array.from(document.querySelectorAll('meta')).reduce((metas, meta) => {
const key = meta.getAttribute('name') || meta.getAttribute('property')
await sleep(1000)
// Links
const links = await Promise.race([
this.timeout(),
await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
({ hash, hostname, href, pathname, protocol, rel }) => ({
hash,
hostname,
href,
pathname,
protocol,
rel
})
)
)
).jsonValue()
])
if (key) {
metas[key.toLowerCase()] = [meta.getAttribute('content')]
}
// Script tags
const scripts = await Promise.race([
this.timeout(),
await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('script'))
.map(({ src }) => src)
.filter((src) => src)
)
).jsonValue()
])
return metas
}, {})
)
).jsonValue()
// JavaScript
const js = await page.evaluate(
(technologies) => {
return technologies.reduce((technologies, { name, chains }) => {
chains.forEach((chain) => {
const value = chain
.split('.')
.reduce(
(value, method) =>
value && value.hasOwnProperty(method)
? value[method]
: undefined,
window
)
// Meta tags
const meta = await Promise.race([
this.timeout(),
await (
await page.evaluateHandle(() =>
Array.from(document.querySelectorAll('meta')).reduce(
(metas, meta) => {
const key =
meta.getAttribute('name') || meta.getAttribute('property')
if (key) {
metas[key.toLowerCase()] = [meta.getAttribute('content')]
}
return metas
},
{}
)
)
).jsonValue()
])
if (typeof value !== 'undefined') {
technologies.push({
name,
chain,
value:
typeof value === 'string' || typeof value === 'number'
? value
: !!value
// JavaScript
const js = await Promise.race([
this.timeout(),
await page.evaluate(
(technologies) => {
return technologies.reduce((technologies, { name, chains }) => {
chains.forEach((chain) => {
const value = chain
.split('.')
.reduce(
(value, method) =>
value && value.hasOwnProperty(method)
? value[method]
: undefined,
window
)
if (typeof value !== 'undefined') {
technologies.push({
name,
chain,
value:
typeof value === 'string' || typeof value === 'number'
? value
: !!value
})
}
})
}
})
return technologies
}, [])
},
Wappalyzer.technologies
.filter(({ js }) => Object.keys(js).length)
.map(({ name, js }) => ({ name, chains: Object.keys(js) }))
)
// Cookies
const cookies = (await page.cookies()).reduce(
(cookies, { name, value }) => ({
...cookies,
[name]: [value]
}),
{}
)
// HTML
let html = await page.content()
return technologies
}, [])
},
Wappalyzer.technologies
.filter(({ js }) => Object.keys(js).length)
.map(({ name, js }) => ({ name, chains: Object.keys(js) }))
)
])
if (this.options.htmlMaxCols && this.options.htmlMaxRows) {
const batches = []
const rows = html.length / this.options.htmlMaxCols
// Cookies
const cookies = (await page.cookies()).reduce(
(cookies, { name, value }) => ({
...cookies,
[name]: [value]
}),
{}
)
for (let i = 0; i < rows; i += 1) {
if (
i < this.options.htmlMaxRows / 2 ||
i > rows - this.options.htmlMaxRows / 2
) {
batches.push(
html.slice(
i * this.options.htmlMaxCols,
(i + 1) * this.options.htmlMaxCols
// HTML
let html = await page.content()
if (this.options.htmlMaxCols && this.options.htmlMaxRows) {
const batches = []
const rows = html.length / this.options.htmlMaxCols
for (let i = 0; i < rows; i += 1) {
if (
i < this.options.htmlMaxRows / 2 ||
i > rows - this.options.htmlMaxRows / 2
) {
batches.push(
html.slice(
i * this.options.htmlMaxCols,
(i + 1) * this.options.htmlMaxCols
)
)
)
}
}
}
html = batches.join('\n')
}
html = batches.join('\n')
}
// Validate response
if (!this.analyzedUrls[url.href].status) {
await page.close()
// Validate response
if (!this.analyzedUrls[url.href].status) {
await page.close()
this.log('Page closed')
this.log('Page closed')
throw new Error('No response from server')
}
throw new Error('No response from server')
}
if (!this.language) {
this.language = await (
await page.evaluateHandle(
() =>
document.documentElement.getAttribute('lang') ||
document.documentElement.getAttribute('xml:lang')
)
).jsonValue()
}
if (!this.language) {
this.language = await Promise.race([
this.timeout(),
await (
await page.evaluateHandle(
() =>
document.documentElement.getAttribute('lang') ||
document.documentElement.getAttribute('xml:lang')
)
).jsonValue()
])
}
if (!this.language) {
try {
const [attrs] = languageDetect.detect(
html.replace(/<\/?[^>]+(>|$)/gs, ' '),
1
)
if (!this.language) {
try {
const [attrs] = languageDetect.detect(
html.replace(/<\/?[^>]+(>|$)/gs, ' '),
1
)
if (attrs) {
;[this.language] = attrs
if (attrs) {
;[this.language] = attrs
}
} catch (error) {
this.error(error)
}
} catch (error) {
this.error(error)
}
}
this.onDetect(analyzeJs(js))
this.onDetect(analyzeJs(js))
this.onDetect(
analyze({
url,
cookies,
html,
scripts,
meta
})
)
this.onDetect(
analyze({
url,
cookies,
html,
scripts,
meta
})
)
const reducedLinks = Array.prototype.reduce.call(
links,
(results, link) => {
if (
results &&
Object.prototype.hasOwnProperty.call(
Object.getPrototypeOf(results),
'push'
) &&
link.protocol &&
link.protocol.match(/https?:/) &&
link.rel !== 'nofollow' &&
link.hostname === url.hostname &&
extensions.test(link.pathname)
) {
results.push(new URL(link.href.split('#')[0]))
}
const reducedLinks = Array.prototype.reduce.call(
links,
(results, link) => {
if (
results &&
Object.prototype.hasOwnProperty.call(
Object.getPrototypeOf(results),
'push'
) &&
link.protocol &&
link.protocol.match(/https?:/) &&
link.rel !== 'nofollow' &&
link.hostname === url.hostname &&
extensions.test(link.pathname)
) {
results.push(new URL(link.href.split('#')[0]))
}
return results
},
[]
)
return results
},
[]
)
await page.close()
await page.close()
this.log('Page closed')
this.log('Page closed')
this.emit('goto', url)
this.emit('goto', url)
return reducedLinks
return reducedLinks
} catch (error) {
this.error(error)
}
}
async analyze(url = this.originalUrl, index = 1, depth = 1) {

@ -13,7 +13,7 @@
"software"
],
"homepage": "https://www.wappalyzer.com",
"version": "6.1.1",
"version": "6.2.0",
"author": "Wappalyzer",
"license": "MIT",
"repository": {
@ -38,4 +38,4 @@
"languagedetect": "^2.0.0",
"puppeteer": "^2.0.0"
}
}
}

@ -4,7 +4,7 @@
"author": "Wappalyzer",
"homepage_url": "https://www.wappalyzer.com",
"description": "Identify web technologies",
"version": "6.0.16",
"version": "6.2.0",
"default_locale": "en",
"manifest_version": 2,
"icons": {

@ -13,7 +13,7 @@
"software"
],
"homepage": "https://www.wappalyzer.com",
"version": "6.1.1",
"version": "6.2.0",
"author": "Wappalyzer",
"license": "MIT",
"repository": {
@ -27,4 +27,4 @@
"files": [
"wappalyzer.js"
]
}
}