|
|
@ -1,4 +1,4 @@
|
|
|
|
const os = require('os')
|
|
|
|
// const os = require('os')
|
|
|
|
const fs = require('fs')
|
|
|
|
const fs = require('fs')
|
|
|
|
const dns = require('dns').promises
|
|
|
|
const dns = require('dns').promises
|
|
|
|
const path = require('path')
|
|
|
|
const path = require('path')
|
|
|
@ -22,10 +22,6 @@ const chromiumArgs = [
|
|
|
|
`--user-data-dir=${CHROMIUM_DATA_DIR || '/tmp/chromium'}`,
|
|
|
|
`--user-data-dir=${CHROMIUM_DATA_DIR || '/tmp/chromium'}`,
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
if (os.arch() === 'arm64') {
|
|
|
|
|
|
|
|
chromiumArgs.push('--single-process')
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/
|
|
|
|
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/
|
|
|
|
|
|
|
|
|
|
|
|
const categories = JSON.parse(
|
|
|
|
const categories = JSON.parse(
|
|
|
@ -400,6 +396,8 @@ class Site {
|
|
|
|
this.cache = {}
|
|
|
|
this.cache = {}
|
|
|
|
|
|
|
|
|
|
|
|
this.probed = false
|
|
|
|
this.probed = false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.destroyed = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
log(message, source = 'driver', type = 'log') {
|
|
|
|
log(message, source = 'driver', type = 'log') {
|
|
|
@ -425,7 +423,7 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
emit(event, params) {
|
|
|
|
emit(event, params) {
|
|
|
|
if (this.listeners[event]) {
|
|
|
|
if (this.listeners[event]) {
|
|
|
|
return Promise.all(
|
|
|
|
return Promise.allSettled(
|
|
|
|
this.listeners[event].map((listener) => listener(params))
|
|
|
|
this.listeners[event].map((listener) => listener(params))
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -470,12 +468,16 @@ class Site {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async goto(url) {
|
|
|
|
async goto(url) {
|
|
|
|
|
|
|
|
if (this.destroyed) {
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached
|
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached
|
|
|
|
if (this.analyzedUrls[url.href]) {
|
|
|
|
if (this.analyzedUrls[url.href]) {
|
|
|
|
return []
|
|
|
|
return []
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
this.log(`Navigate to ${url}`, 'page')
|
|
|
|
this.log(`Navigate to ${url}`)
|
|
|
|
|
|
|
|
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
status: 0,
|
|
|
|
status: 0,
|
|
|
@ -493,7 +495,13 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
page = await this.browser.newPage()
|
|
|
|
page = await this.browser.newPage()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!page || page.isClosed()) {
|
|
|
|
|
|
|
|
throw new Error('Page did not open')
|
|
|
|
|
|
|
|
}
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
this.error(error)
|
|
|
|
|
|
|
|
|
|
|
|
await this.initDriver()
|
|
|
|
await this.initDriver()
|
|
|
@ -509,9 +517,15 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
await page.setRequestInterception(true)
|
|
|
|
await page.setRequestInterception(true)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
await page.setUserAgent(this.options.userAgent)
|
|
|
|
|
|
|
|
|
|
|
|
page.on('dialog', (dialog) => dialog.dismiss())
|
|
|
|
page.on('dialog', (dialog) => dialog.dismiss())
|
|
|
|
|
|
|
|
|
|
|
|
page.on('error', (error) => this.error(error))
|
|
|
|
page.on('error', (error) => {
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
let responseReceived = false
|
|
|
|
let responseReceived = false
|
|
|
|
|
|
|
|
|
|
|
@ -523,6 +537,8 @@ class Site {
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
;({ hostname } = new URL(request.url()))
|
|
|
|
;({ hostname } = new URL(request.url()))
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
request.abort('blockedbyclient')
|
|
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -563,11 +579,17 @@ class Site {
|
|
|
|
request.continue({ headers })
|
|
|
|
request.continue({ headers })
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
this.error(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
page.on('response', async (response) => {
|
|
|
|
page.on('response', async (response) => {
|
|
|
|
|
|
|
|
if (this.destroyed || !page || page.__closed || page.isClosed()) {
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
if (
|
|
|
|
if (
|
|
|
|
response.status() < 300 &&
|
|
|
|
response.status() < 300 &&
|
|
|
@ -578,7 +600,15 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(response.url(), analyze({ scripts }))
|
|
|
|
await this.onDetect(response.url(), analyze({ scripts }))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
if (error.constructor.name !== 'ProtocolError') {
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
if (response.url() === url.href) {
|
|
|
|
if (response.url() === url.href) {
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
status: response.status(),
|
|
|
|
status: response.status(),
|
|
|
@ -625,26 +655,21 @@ class Site {
|
|
|
|
await this.emit('response', { page, response, headers, certIssuer })
|
|
|
|
await this.emit('response', { page, response, headers, certIssuer })
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
this.error(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
await page.setUserAgent(this.options.userAgent)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
await this.promiseTimeout(page.goto(url.href))
|
|
|
|
await page.goto(url.href)
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
|
|
|
error.constructor.name !== 'TimeoutError' &&
|
|
|
|
|
|
|
|
error.code !== 'PROMISE_TIMEOUT_ERROR'
|
|
|
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
throw error
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (page.url() === 'about:blank') {
|
|
|
|
if (page.url() === 'about:blank') {
|
|
|
|
throw new Error('The website failed to load')
|
|
|
|
const error = new Error(`The page failed to load (${url})`)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
error.code = 'WAPPALYZER_PAGE_EMPTY'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
throw error
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!this.options.noScripts) {
|
|
|
|
if (!this.options.noScripts) {
|
|
|
@ -665,6 +690,8 @@ class Site {
|
|
|
|
{}
|
|
|
|
{}
|
|
|
|
)
|
|
|
|
)
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
this.error(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -906,18 +933,26 @@ class Site {
|
|
|
|
...this.cache[url.href],
|
|
|
|
...this.cache[url.href],
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
page.__closed = true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
await page.close()
|
|
|
|
await page.close()
|
|
|
|
|
|
|
|
|
|
|
|
this.log(`Page closed (${url})`)
|
|
|
|
this.log(`Page closed (${url})`)
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
// Continue
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return reducedLinks
|
|
|
|
return reducedLinks
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
page.__closed = true
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
await page.close()
|
|
|
|
await page.close()
|
|
|
|
|
|
|
|
|
|
|
|
this.log(`Page closed (${url})`)
|
|
|
|
this.log(`Page closed (${url})`)
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
this.log(error)
|
|
|
|
// Continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let hostname = url
|
|
|
|
let hostname = url
|
|
|
@ -928,43 +963,35 @@ class Site {
|
|
|
|
// Continue
|
|
|
|
// Continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
|
|
|
error.constructor.name === 'TimeoutError' ||
|
|
|
|
|
|
|
|
error.code === 'PROMISE_TIMEOUT_ERROR'
|
|
|
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
const newError = new Error(
|
|
|
|
|
|
|
|
`The website took too long to respond: ${
|
|
|
|
|
|
|
|
error.message || error
|
|
|
|
|
|
|
|
} at ${hostname}`
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
newError.code = 'WAPPALYZER_TIMEOUT_ERROR'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
throw newError
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
|
|
|
|
if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
|
|
|
|
const newError = new Error(
|
|
|
|
const newError = new Error(`Hostname could not be resolved (${url})`)
|
|
|
|
`Hostname could not be resolved at ${hostname}`
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
newError.code = 'WAPPALYZER_DNS_ERROR'
|
|
|
|
newError.code = 'WAPPALYZER_DNS_ERROR'
|
|
|
|
|
|
|
|
|
|
|
|
throw newError
|
|
|
|
throw newError
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
|
|
|
error.constructor.name === 'TimeoutError' ||
|
|
|
|
|
|
|
|
error.code === 'PROMISE_TIMEOUT_ERROR'
|
|
|
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
error.code = 'WAPPALYZER_TIMEOUT_ERROR'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
throw error
|
|
|
|
throw error
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async analyze(url = this.originalUrl, index = 1, depth = 1) {
|
|
|
|
async analyze(url = this.originalUrl, index = 1, depth = 1) {
|
|
|
|
try {
|
|
|
|
|
|
|
|
if (this.options.recursive) {
|
|
|
|
if (this.options.recursive) {
|
|
|
|
await sleep(this.options.delay * index)
|
|
|
|
await sleep(this.options.delay * index)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
await Promise.all([
|
|
|
|
await Promise.allSettled([
|
|
|
|
(async () => {
|
|
|
|
(async () => {
|
|
|
|
|
|
|
|
try {
|
|
|
|
const links = ((await this.goto(url)) || []).filter(
|
|
|
|
const links = ((await this.goto(url)) || []).filter(
|
|
|
|
({ href }) => !this.analyzedUrls[href]
|
|
|
|
({ href }) => !this.analyzedUrls[href]
|
|
|
|
)
|
|
|
|
)
|
|
|
@ -983,6 +1010,16 @@ class Site {
|
|
|
|
depth + 1
|
|
|
|
depth + 1
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
|
|
|
|
status: this.analyzedUrls[url.href]?.status || 0,
|
|
|
|
|
|
|
|
error: error.message || error.toString(),
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
|
|
|
|
}
|
|
|
|
})(),
|
|
|
|
})(),
|
|
|
|
(async () => {
|
|
|
|
(async () => {
|
|
|
|
if (this.options.probe && !this.probed) {
|
|
|
|
if (this.options.probe && !this.probed) {
|
|
|
@ -992,14 +1029,6 @@ class Site {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})(),
|
|
|
|
})(),
|
|
|
|
])
|
|
|
|
])
|
|
|
|
} catch (error) {
|
|
|
|
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
|
|
|
|
status: this.analyzedUrls[url.href]?.status || 0,
|
|
|
|
|
|
|
|
error: error.message || error.toString(),
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const patterns = this.options.extended
|
|
|
|
const patterns = this.options.extended
|
|
|
|
? this.detections.reduce(
|
|
|
|
? this.detections.reduce(
|
|
|
@ -1076,6 +1105,8 @@ class Site {
|
|
|
|
return this.promiseTimeout(
|
|
|
|
return this.promiseTimeout(
|
|
|
|
func(hostname).catch((error) => {
|
|
|
|
func(hostname).catch((error) => {
|
|
|
|
if (error.code !== 'ENODATA') {
|
|
|
|
if (error.code !== 'ENODATA') {
|
|
|
|
|
|
|
|
error.message += ` (${url})`
|
|
|
|
|
|
|
|
|
|
|
|
this.error(error)
|
|
|
|
this.error(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -1089,7 +1120,7 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
const domain = url.hostname.replace(/^www\./, '')
|
|
|
|
const domain = url.hostname.replace(/^www\./, '')
|
|
|
|
|
|
|
|
|
|
|
|
await Promise.all([
|
|
|
|
await Promise.allSettled([
|
|
|
|
// Static files
|
|
|
|
// Static files
|
|
|
|
...Object.keys(files).map(async (file, index) => {
|
|
|
|
...Object.keys(files).map(async (file, index) => {
|
|
|
|
const path = files[file]
|
|
|
|
const path = files[file]
|
|
|
@ -1099,7 +1130,7 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
const body = await get(new URL(path, url.href), {
|
|
|
|
const body = await get(new URL(path, url.href), {
|
|
|
|
userAgent: this.options.userAgent,
|
|
|
|
userAgent: this.options.userAgent,
|
|
|
|
timeout: Math.min(this.options.maxWait, 3000),
|
|
|
|
timeout: Math.min(this.options.maxWait, 1000),
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
this.log(`Probe ok (${path})`)
|
|
|
|
this.log(`Probe ok (${path})`)
|
|
|
@ -1156,7 +1187,7 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
const batched = links.splice(0, this.options.batchSize)
|
|
|
|
const batched = links.splice(0, this.options.batchSize)
|
|
|
|
|
|
|
|
|
|
|
|
await Promise.all(
|
|
|
|
await Promise.allSettled(
|
|
|
|
batched.map((link, index) => this.analyze(link, index, depth))
|
|
|
|
batched.map((link, index) => this.analyze(link, index, depth))
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
@ -1189,7 +1220,7 @@ class Site {
|
|
|
|
),
|
|
|
|
),
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
await Promise.all(
|
|
|
|
await Promise.allSettled(
|
|
|
|
requires.map(async ({ name, categoryId, technologies }) => {
|
|
|
|
requires.map(async ({ name, categoryId, technologies }) => {
|
|
|
|
const id = categoryId
|
|
|
|
const id = categoryId
|
|
|
|
? `category:${categoryId}`
|
|
|
|
? `category:${categoryId}`
|
|
|
@ -1242,9 +1273,11 @@ class Site {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async destroy() {
|
|
|
|
async destroy() {
|
|
|
|
await Promise.all(
|
|
|
|
await Promise.allSettled(
|
|
|
|
this.pages.map(async (page) => {
|
|
|
|
this.pages.map(async (page) => {
|
|
|
|
if (page) {
|
|
|
|
if (page) {
|
|
|
|
|
|
|
|
page.__closed = true
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
await page.close()
|
|
|
|
await page.close()
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
@ -1254,6 +1287,8 @@ class Site {
|
|
|
|
})
|
|
|
|
})
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.destroyed = true
|
|
|
|
|
|
|
|
|
|
|
|
this.log('Site closed')
|
|
|
|
this.log('Site closed')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|