|
|
|
@ -195,6 +195,14 @@ class Site {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
timeout() {
|
|
|
|
|
return new Promise(() =>
|
|
|
|
|
setTimeout(() => {
|
|
|
|
|
throw new Error('The website took too long to respond')
|
|
|
|
|
}, this.options.maxWait)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async goto(url) {
|
|
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached
|
|
|
|
|
if (
|
|
|
|
@ -292,22 +300,16 @@ class Site {
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
await Promise.race([
|
|
|
|
|
page.goto(url.href, { waitUntil: 'domcontentloaded' }),
|
|
|
|
|
new Promise((resolve, reject) =>
|
|
|
|
|
setTimeout(
|
|
|
|
|
() => reject(new Error('The website took too long to respond')),
|
|
|
|
|
this.options.maxWait
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
this.timeout(),
|
|
|
|
|
page.goto(url.href, { waitUntil: 'domcontentloaded' })
|
|
|
|
|
])
|
|
|
|
|
} catch (error) {
|
|
|
|
|
this.error(error)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await sleep(1000)
|
|
|
|
|
|
|
|
|
|
// Links
|
|
|
|
|
const links = await (
|
|
|
|
|
const links = await Promise.race([
|
|
|
|
|
this.timeout(),
|
|
|
|
|
await (
|
|
|
|
|
await page.evaluateHandle(() =>
|
|
|
|
|
Array.from(document.getElementsByTagName('a')).map(
|
|
|
|
|
({ hash, hostname, href, pathname, protocol, rel }) => ({
|
|
|
|
@ -321,33 +323,46 @@ class Site {
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
).jsonValue()
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
// Script tags
|
|
|
|
|
const scripts = await (
|
|
|
|
|
const scripts = await Promise.race([
|
|
|
|
|
this.timeout(),
|
|
|
|
|
await (
|
|
|
|
|
await page.evaluateHandle(() =>
|
|
|
|
|
Array.from(document.getElementsByTagName('script'))
|
|
|
|
|
.map(({ src }) => src)
|
|
|
|
|
.filter((src) => src)
|
|
|
|
|
)
|
|
|
|
|
).jsonValue()
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
// Meta tags
|
|
|
|
|
const meta = await (
|
|
|
|
|
const meta = await Promise.race([
|
|
|
|
|
this.timeout(),
|
|
|
|
|
await (
|
|
|
|
|
await page.evaluateHandle(() =>
|
|
|
|
|
Array.from(document.querySelectorAll('meta')).reduce((metas, meta) => {
|
|
|
|
|
const key = meta.getAttribute('name') || meta.getAttribute('property')
|
|
|
|
|
Array.from(document.querySelectorAll('meta')).reduce(
|
|
|
|
|
(metas, meta) => {
|
|
|
|
|
const key =
|
|
|
|
|
meta.getAttribute('name') || meta.getAttribute('property')
|
|
|
|
|
|
|
|
|
|
if (key) {
|
|
|
|
|
metas[key.toLowerCase()] = [meta.getAttribute('content')]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return metas
|
|
|
|
|
}, {})
|
|
|
|
|
},
|
|
|
|
|
{}
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
).jsonValue()
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
// JavaScript
|
|
|
|
|
const js = await page.evaluate(
|
|
|
|
|
const js = await Promise.race([
|
|
|
|
|
this.timeout(),
|
|
|
|
|
await page.evaluate(
|
|
|
|
|
(technologies) => {
|
|
|
|
|
return technologies.reduce((technologies, { name, chains }) => {
|
|
|
|
|
chains.forEach((chain) => {
|
|
|
|
@ -380,6 +395,7 @@ class Site {
|
|
|
|
|
.filter(({ js }) => Object.keys(js).length)
|
|
|
|
|
.map(({ name, js }) => ({ name, chains: Object.keys(js) }))
|
|
|
|
|
)
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
// Cookies
|
|
|
|
|
const cookies = (await page.cookies()).reduce(
|
|
|
|
@ -424,13 +440,16 @@ class Site {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!this.language) {
|
|
|
|
|
this.language = await (
|
|
|
|
|
this.language = await Promise.race([
|
|
|
|
|
this.timeout(),
|
|
|
|
|
await (
|
|
|
|
|
await page.evaluateHandle(
|
|
|
|
|
() =>
|
|
|
|
|
document.documentElement.getAttribute('lang') ||
|
|
|
|
|
document.documentElement.getAttribute('xml:lang')
|
|
|
|
|
)
|
|
|
|
|
).jsonValue()
|
|
|
|
|
])
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!this.language) {
|
|
|
|
@ -490,6 +509,9 @@ class Site {
|
|
|
|
|
this.emit('goto', url)
|
|
|
|
|
|
|
|
|
|
return reducedLinks
|
|
|
|
|
} catch (error) {
|
|
|
|
|
this.error(error)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async analyze(url = this.originalUrl, index = 1, depth = 1) {
|
|
|
|
|