|
|
@ -16,6 +16,8 @@ const chromiumArgs = [
|
|
|
|
'--no-sandbox',
|
|
|
|
'--no-sandbox',
|
|
|
|
'--no-zygote',
|
|
|
|
'--no-zygote',
|
|
|
|
'--disable-gpu',
|
|
|
|
'--disable-gpu',
|
|
|
|
|
|
|
|
'--disable-software-rasterizer',
|
|
|
|
|
|
|
|
'--disable-features=AudioServiceOutOfProcess',
|
|
|
|
'--ignore-certificate-errors',
|
|
|
|
'--ignore-certificate-errors',
|
|
|
|
'--allow-running-insecure-content',
|
|
|
|
'--allow-running-insecure-content',
|
|
|
|
'--disable-web-security',
|
|
|
|
'--disable-web-security',
|
|
|
@ -400,6 +402,8 @@ class Site {
|
|
|
|
this.cache = {}
|
|
|
|
this.cache = {}
|
|
|
|
|
|
|
|
|
|
|
|
this.probed = false
|
|
|
|
this.probed = false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.destroyed = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
log(message, source = 'driver', type = 'log') {
|
|
|
|
log(message, source = 'driver', type = 'log') {
|
|
|
@ -470,6 +474,10 @@ class Site {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async goto(url) {
|
|
|
|
async goto(url) {
|
|
|
|
|
|
|
|
if (this.destroyed) {
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached
|
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached
|
|
|
|
if (this.analyzedUrls[url.href]) {
|
|
|
|
if (this.analyzedUrls[url.href]) {
|
|
|
|
return []
|
|
|
|
return []
|
|
|
@ -568,6 +576,10 @@ class Site {
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
page.on('response', async (response) => {
|
|
|
|
page.on('response', async (response) => {
|
|
|
|
|
|
|
|
if (this.destroyed || !page || page.isClosed()) {
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
if (
|
|
|
|
if (
|
|
|
|
response.status() < 300 &&
|
|
|
|
response.status() < 300 &&
|
|
|
@ -578,7 +590,11 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(response.url(), analyze({ scripts }))
|
|
|
|
await this.onDetect(response.url(), analyze({ scripts }))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
if (response.url() === url.href) {
|
|
|
|
if (response.url() === url.href) {
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
status: response.status(),
|
|
|
|
status: response.status(),
|
|
|
@ -644,7 +660,7 @@ class Site {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (page.url() === 'about:blank') {
|
|
|
|
if (page.url() === 'about:blank') {
|
|
|
|
throw new Error('The website failed to load')
|
|
|
|
throw new Error(`The page failed to load (${url.href})`)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!this.options.noScripts) {
|
|
|
|
if (!this.options.noScripts) {
|
|
|
@ -906,7 +922,11 @@ class Site {
|
|
|
|
...this.cache[url.href],
|
|
|
|
...this.cache[url.href],
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
await page.close()
|
|
|
|
await page.close()
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
// Continue
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
this.log(`Page closed (${url})`)
|
|
|
|
this.log(`Page closed (${url})`)
|
|
|
|
|
|
|
|
|
|
|
@ -917,7 +937,7 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
this.log(`Page closed (${url})`)
|
|
|
|
this.log(`Page closed (${url})`)
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
this.log(error)
|
|
|
|
// Continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let hostname = url
|
|
|
|
let hostname = url
|
|
|
@ -1254,6 +1274,8 @@ class Site {
|
|
|
|
})
|
|
|
|
})
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.destroyed = true
|
|
|
|
|
|
|
|
|
|
|
|
this.log('Site closed')
|
|
|
|
this.log('Site closed')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|