Merge branch 'wappalyzer:master' into technology/tangled-network

main
Dipak Parmar 3 years ago committed by GitHub
commit 9f8bbe3fd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,4 +1,4 @@
const os = require('os')
// const os = require('os')
const fs = require('fs')
const dns = require('dns').promises
const path = require('path')
@ -22,10 +22,6 @@ const chromiumArgs = [
`--user-data-dir=${CHROMIUM_DATA_DIR || '/tmp/chromium'}`,
]
if (os.arch() === 'arm64') {
chromiumArgs.push('--single-process')
}
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/
const categories = JSON.parse(
@ -400,6 +396,8 @@ class Site {
this.cache = {}
this.probed = false
this.destroyed = false
}
log(message, source = 'driver', type = 'log') {
@ -425,7 +423,7 @@ class Site {
emit(event, params) {
if (this.listeners[event]) {
return Promise.all(
return Promise.allSettled(
this.listeners[event].map((listener) => listener(params))
)
}
@ -470,12 +468,16 @@ class Site {
}
async goto(url) {
if (this.destroyed) {
return
}
// Return when the URL is a duplicate or maxUrls has been reached
if (this.analyzedUrls[url.href]) {
return []
}
this.log(`Navigate to ${url}`, 'page')
this.log(`Navigate to ${url}`)
this.analyzedUrls[url.href] = {
status: 0,
@ -493,7 +495,13 @@ class Site {
try {
page = await this.browser.newPage()
if (!page || page.isClosed()) {
throw new Error('Page did not open')
}
} catch (error) {
error.message += ` (${url})`
this.error(error)
await this.initDriver()
@ -509,9 +517,15 @@ class Site {
await page.setRequestInterception(true)
await page.setUserAgent(this.options.userAgent)
page.on('dialog', (dialog) => dialog.dismiss())
page.on('error', (error) => this.error(error))
page.on('error', (error) => {
error.message += ` (${url})`
this.error(error)
})
let responseReceived = false
@ -523,6 +537,8 @@ class Site {
try {
;({ hostname } = new URL(request.url()))
} catch (error) {
request.abort('blockedbyclient')
return
}
@ -563,11 +579,17 @@ class Site {
request.continue({ headers })
}
} catch (error) {
error.message += ` (${url})`
this.error(error)
}
})
page.on('response', async (response) => {
if (this.destroyed || !page || page.__closed || page.isClosed()) {
return
}
try {
if (
response.status() < 300 &&
@ -578,7 +600,15 @@ class Site {
await this.onDetect(response.url(), analyze({ scripts }))
}
} catch (error) {
if (error.constructor.name !== 'ProtocolError') {
error.message += ` (${url})`
this.error(error)
}
}
try {
if (response.url() === url.href) {
this.analyzedUrls[url.href] = {
status: response.status(),
@ -625,26 +655,21 @@ class Site {
await this.emit('response', { page, response, headers, certIssuer })
}
} catch (error) {
error.message += ` (${url})`
this.error(error)
}
})
await page.setUserAgent(this.options.userAgent)
try {
try {
await this.promiseTimeout(page.goto(url.href))
} catch (error) {
if (
error.constructor.name !== 'TimeoutError' &&
error.code !== 'PROMISE_TIMEOUT_ERROR'
) {
throw error
}
}
await page.goto(url.href)
if (page.url() === 'about:blank') {
throw new Error('The website failed to load')
const error = new Error(`The page failed to load (${url})`)
error.code = 'WAPPALYZER_PAGE_EMPTY'
throw error
}
if (!this.options.noScripts) {
@ -665,6 +690,8 @@ class Site {
{}
)
} catch (error) {
error.message += ` (${url})`
this.error(error)
}
@ -906,18 +933,26 @@ class Site {
...this.cache[url.href],
})
await page.close()
page.__closed = true
this.log(`Page closed (${url})`)
try {
await page.close()
this.log(`Page closed (${url})`)
} catch (error) {
// Continue
}
return reducedLinks
} catch (error) {
page.__closed = true
try {
await page.close()
this.log(`Page closed (${url})`)
} catch (error) {
this.log(error)
// Continue
}
let hostname = url
@ -928,43 +963,35 @@ class Site {
// Continue
}
if (
error.constructor.name === 'TimeoutError' ||
error.code === 'PROMISE_TIMEOUT_ERROR'
) {
const newError = new Error(
`The website took too long to respond: ${
error.message || error
} at ${hostname}`
)
newError.code = 'WAPPALYZER_TIMEOUT_ERROR'
throw newError
}
if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
const newError = new Error(
`Hostname could not be resolved at ${hostname}`
)
const newError = new Error(`Hostname could not be resolved (${url})`)
newError.code = 'WAPPALYZER_DNS_ERROR'
throw newError
}
if (
error.constructor.name === 'TimeoutError' ||
error.code === 'PROMISE_TIMEOUT_ERROR'
) {
error.code = 'WAPPALYZER_TIMEOUT_ERROR'
}
error.message += ` (${url})`
throw error
}
}
async analyze(url = this.originalUrl, index = 1, depth = 1) {
try {
if (this.options.recursive) {
await sleep(this.options.delay * index)
}
if (this.options.recursive) {
await sleep(this.options.delay * index)
}
await Promise.all([
(async () => {
await Promise.allSettled([
(async () => {
try {
const links = ((await this.goto(url)) || []).filter(
({ href }) => !this.analyzedUrls[href]
)
@ -983,23 +1010,25 @@ class Site {
depth + 1
)
}
})(),
(async () => {
if (this.options.probe && !this.probed) {
this.probed = true
await this.probe(url)
} catch (error) {
this.analyzedUrls[url.href] = {
status: this.analyzedUrls[url.href]?.status || 0,
error: error.message || error.toString(),
}
})(),
])
} catch (error) {
this.analyzedUrls[url.href] = {
status: this.analyzedUrls[url.href]?.status || 0,
error: error.message || error.toString(),
}
this.error(error)
}
error.message += ` (${url})`
this.error(error)
}
})(),
(async () => {
if (this.options.probe && !this.probed) {
this.probed = true
await this.probe(url)
}
})(),
])
const patterns = this.options.extended
? this.detections.reduce(
@ -1076,6 +1105,8 @@ class Site {
return this.promiseTimeout(
func(hostname).catch((error) => {
if (error.code !== 'ENODATA') {
error.message += ` (${url})`
this.error(error)
}
@ -1089,7 +1120,7 @@ class Site {
const domain = url.hostname.replace(/^www\./, '')
await Promise.all([
await Promise.allSettled([
// Static files
...Object.keys(files).map(async (file, index) => {
const path = files[file]
@ -1099,7 +1130,7 @@ class Site {
const body = await get(new URL(path, url.href), {
userAgent: this.options.userAgent,
timeout: Math.min(this.options.maxWait, 3000),
timeout: Math.min(this.options.maxWait, 1000),
})
this.log(`Probe ok (${path})`)
@ -1156,7 +1187,7 @@ class Site {
const batched = links.splice(0, this.options.batchSize)
await Promise.all(
await Promise.allSettled(
batched.map((link, index) => this.analyze(link, index, depth))
)
@ -1189,7 +1220,7 @@ class Site {
),
]
await Promise.all(
await Promise.allSettled(
requires.map(async ({ name, categoryId, technologies }) => {
const id = categoryId
? `category:${categoryId}`
@ -1242,9 +1273,11 @@ class Site {
}
async destroy() {
await Promise.all(
await Promise.allSettled(
this.pages.map(async (page) => {
if (page) {
page.__closed = true
try {
await page.close()
} catch (error) {
@ -1254,6 +1287,8 @@ class Site {
})
)
this.destroyed = true
this.log('Site closed')
}
}

@ -13,7 +13,7 @@
"software"
],
"homepage": "https://www.wappalyzer.com/",
"version": "6.10.27",
"version": "6.10.35",
"author": "Wappalyzer",
"license": "MIT",
"repository": {
@ -38,7 +38,7 @@
"wappalyzer": "./cli.js"
},
"dependencies": {
"puppeteer": "^13.5.2"
"puppeteer": "~14.1.0"
},
"engines": {
"node": ">=14"

@ -4,7 +4,7 @@
"author": "Wappalyzer",
"homepage_url": "https://www.wappalyzer.com/",
"description": "Identify web technologies",
"version": "6.10.27",
"version": "6.10.35",
"default_locale": "en",
"manifest_version": 2,
"icons": {

@ -13,7 +13,7 @@
"software"
],
"homepage": "https://www.wappalyzer.com/",
"version": "6.10.27",
"version": "6.10.35",
"author": "Wappalyzer",
"license": "MIT",
"repository": {