Merge branch 'wappalyzer:master' into technology/tangled-network

main
Dipak Parmar 2 years ago committed by GitHub
commit 9f8bbe3fd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,4 +1,4 @@
const os = require('os') // const os = require('os')
const fs = require('fs') const fs = require('fs')
const dns = require('dns').promises const dns = require('dns').promises
const path = require('path') const path = require('path')
@ -22,10 +22,6 @@ const chromiumArgs = [
`--user-data-dir=${CHROMIUM_DATA_DIR || '/tmp/chromium'}`, `--user-data-dir=${CHROMIUM_DATA_DIR || '/tmp/chromium'}`,
] ]
if (os.arch() === 'arm64') {
chromiumArgs.push('--single-process')
}
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/ const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/
const categories = JSON.parse( const categories = JSON.parse(
@ -400,6 +396,8 @@ class Site {
this.cache = {} this.cache = {}
this.probed = false this.probed = false
this.destroyed = false
} }
log(message, source = 'driver', type = 'log') { log(message, source = 'driver', type = 'log') {
@ -425,7 +423,7 @@ class Site {
emit(event, params) { emit(event, params) {
if (this.listeners[event]) { if (this.listeners[event]) {
return Promise.all( return Promise.allSettled(
this.listeners[event].map((listener) => listener(params)) this.listeners[event].map((listener) => listener(params))
) )
} }
@ -470,12 +468,16 @@ class Site {
} }
async goto(url) { async goto(url) {
if (this.destroyed) {
return
}
// Return when the URL is a duplicate or maxUrls has been reached // Return when the URL is a duplicate or maxUrls has been reached
if (this.analyzedUrls[url.href]) { if (this.analyzedUrls[url.href]) {
return [] return []
} }
this.log(`Navigate to ${url}`, 'page') this.log(`Navigate to ${url}`)
this.analyzedUrls[url.href] = { this.analyzedUrls[url.href] = {
status: 0, status: 0,
@ -493,7 +495,13 @@ class Site {
try { try {
page = await this.browser.newPage() page = await this.browser.newPage()
if (!page || page.isClosed()) {
throw new Error('Page did not open')
}
} catch (error) { } catch (error) {
error.message += ` (${url})`
this.error(error) this.error(error)
await this.initDriver() await this.initDriver()
@ -509,9 +517,15 @@ class Site {
await page.setRequestInterception(true) await page.setRequestInterception(true)
await page.setUserAgent(this.options.userAgent)
page.on('dialog', (dialog) => dialog.dismiss()) page.on('dialog', (dialog) => dialog.dismiss())
page.on('error', (error) => this.error(error)) page.on('error', (error) => {
error.message += ` (${url})`
this.error(error)
})
let responseReceived = false let responseReceived = false
@ -523,6 +537,8 @@ class Site {
try { try {
;({ hostname } = new URL(request.url())) ;({ hostname } = new URL(request.url()))
} catch (error) { } catch (error) {
request.abort('blockedbyclient')
return return
} }
@ -563,11 +579,17 @@ class Site {
request.continue({ headers }) request.continue({ headers })
} }
} catch (error) { } catch (error) {
error.message += ` (${url})`
this.error(error) this.error(error)
} }
}) })
page.on('response', async (response) => { page.on('response', async (response) => {
if (this.destroyed || !page || page.__closed || page.isClosed()) {
return
}
try { try {
if ( if (
response.status() < 300 && response.status() < 300 &&
@ -578,7 +600,15 @@ class Site {
await this.onDetect(response.url(), analyze({ scripts })) await this.onDetect(response.url(), analyze({ scripts }))
} }
} catch (error) {
if (error.constructor.name !== 'ProtocolError') {
error.message += ` (${url})`
this.error(error)
}
}
try {
if (response.url() === url.href) { if (response.url() === url.href) {
this.analyzedUrls[url.href] = { this.analyzedUrls[url.href] = {
status: response.status(), status: response.status(),
@ -625,26 +655,21 @@ class Site {
await this.emit('response', { page, response, headers, certIssuer }) await this.emit('response', { page, response, headers, certIssuer })
} }
} catch (error) { } catch (error) {
error.message += ` (${url})`
this.error(error) this.error(error)
} }
}) })
await page.setUserAgent(this.options.userAgent)
try {
try { try {
await this.promiseTimeout(page.goto(url.href)) await page.goto(url.href)
} catch (error) {
if (
error.constructor.name !== 'TimeoutError' &&
error.code !== 'PROMISE_TIMEOUT_ERROR'
) {
throw error
}
}
if (page.url() === 'about:blank') { if (page.url() === 'about:blank') {
throw new Error('The website failed to load') const error = new Error(`The page failed to load (${url})`)
error.code = 'WAPPALYZER_PAGE_EMPTY'
throw error
} }
if (!this.options.noScripts) { if (!this.options.noScripts) {
@ -665,6 +690,8 @@ class Site {
{} {}
) )
} catch (error) { } catch (error) {
error.message += ` (${url})`
this.error(error) this.error(error)
} }
@ -906,18 +933,26 @@ class Site {
...this.cache[url.href], ...this.cache[url.href],
}) })
page.__closed = true
try {
await page.close() await page.close()
this.log(`Page closed (${url})`) this.log(`Page closed (${url})`)
} catch (error) {
// Continue
}
return reducedLinks return reducedLinks
} catch (error) { } catch (error) {
page.__closed = true
try { try {
await page.close() await page.close()
this.log(`Page closed (${url})`) this.log(`Page closed (${url})`)
} catch (error) { } catch (error) {
this.log(error) // Continue
} }
let hostname = url let hostname = url
@ -928,43 +963,35 @@ class Site {
// Continue // Continue
} }
if (
error.constructor.name === 'TimeoutError' ||
error.code === 'PROMISE_TIMEOUT_ERROR'
) {
const newError = new Error(
`The website took too long to respond: ${
error.message || error
} at ${hostname}`
)
newError.code = 'WAPPALYZER_TIMEOUT_ERROR'
throw newError
}
if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) { if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
const newError = new Error( const newError = new Error(`Hostname could not be resolved (${url})`)
`Hostname could not be resolved at ${hostname}`
)
newError.code = 'WAPPALYZER_DNS_ERROR' newError.code = 'WAPPALYZER_DNS_ERROR'
throw newError throw newError
} }
if (
error.constructor.name === 'TimeoutError' ||
error.code === 'PROMISE_TIMEOUT_ERROR'
) {
error.code = 'WAPPALYZER_TIMEOUT_ERROR'
}
error.message += ` (${url})`
throw error throw error
} }
} }
async analyze(url = this.originalUrl, index = 1, depth = 1) { async analyze(url = this.originalUrl, index = 1, depth = 1) {
try {
if (this.options.recursive) { if (this.options.recursive) {
await sleep(this.options.delay * index) await sleep(this.options.delay * index)
} }
await Promise.all([ await Promise.allSettled([
(async () => { (async () => {
try {
const links = ((await this.goto(url)) || []).filter( const links = ((await this.goto(url)) || []).filter(
({ href }) => !this.analyzedUrls[href] ({ href }) => !this.analyzedUrls[href]
) )
@ -983,6 +1010,16 @@ class Site {
depth + 1 depth + 1
) )
} }
} catch (error) {
this.analyzedUrls[url.href] = {
status: this.analyzedUrls[url.href]?.status || 0,
error: error.message || error.toString(),
}
error.message += ` (${url})`
this.error(error)
}
})(), })(),
(async () => { (async () => {
if (this.options.probe && !this.probed) { if (this.options.probe && !this.probed) {
@ -992,14 +1029,6 @@ class Site {
} }
})(), })(),
]) ])
} catch (error) {
this.analyzedUrls[url.href] = {
status: this.analyzedUrls[url.href]?.status || 0,
error: error.message || error.toString(),
}
this.error(error)
}
const patterns = this.options.extended const patterns = this.options.extended
? this.detections.reduce( ? this.detections.reduce(
@ -1076,6 +1105,8 @@ class Site {
return this.promiseTimeout( return this.promiseTimeout(
func(hostname).catch((error) => { func(hostname).catch((error) => {
if (error.code !== 'ENODATA') { if (error.code !== 'ENODATA') {
error.message += ` (${url})`
this.error(error) this.error(error)
} }
@ -1089,7 +1120,7 @@ class Site {
const domain = url.hostname.replace(/^www\./, '') const domain = url.hostname.replace(/^www\./, '')
await Promise.all([ await Promise.allSettled([
// Static files // Static files
...Object.keys(files).map(async (file, index) => { ...Object.keys(files).map(async (file, index) => {
const path = files[file] const path = files[file]
@ -1099,7 +1130,7 @@ class Site {
const body = await get(new URL(path, url.href), { const body = await get(new URL(path, url.href), {
userAgent: this.options.userAgent, userAgent: this.options.userAgent,
timeout: Math.min(this.options.maxWait, 3000), timeout: Math.min(this.options.maxWait, 1000),
}) })
this.log(`Probe ok (${path})`) this.log(`Probe ok (${path})`)
@ -1156,7 +1187,7 @@ class Site {
const batched = links.splice(0, this.options.batchSize) const batched = links.splice(0, this.options.batchSize)
await Promise.all( await Promise.allSettled(
batched.map((link, index) => this.analyze(link, index, depth)) batched.map((link, index) => this.analyze(link, index, depth))
) )
@ -1189,7 +1220,7 @@ class Site {
), ),
] ]
await Promise.all( await Promise.allSettled(
requires.map(async ({ name, categoryId, technologies }) => { requires.map(async ({ name, categoryId, technologies }) => {
const id = categoryId const id = categoryId
? `category:${categoryId}` ? `category:${categoryId}`
@ -1242,9 +1273,11 @@ class Site {
} }
async destroy() { async destroy() {
await Promise.all( await Promise.allSettled(
this.pages.map(async (page) => { this.pages.map(async (page) => {
if (page) { if (page) {
page.__closed = true
try { try {
await page.close() await page.close()
} catch (error) { } catch (error) {
@ -1254,6 +1287,8 @@ class Site {
}) })
) )
this.destroyed = true
this.log('Site closed') this.log('Site closed')
} }
} }

@ -13,7 +13,7 @@
"software" "software"
], ],
"homepage": "https://www.wappalyzer.com/", "homepage": "https://www.wappalyzer.com/",
"version": "6.10.27", "version": "6.10.35",
"author": "Wappalyzer", "author": "Wappalyzer",
"license": "MIT", "license": "MIT",
"repository": { "repository": {
@ -38,7 +38,7 @@
"wappalyzer": "./cli.js" "wappalyzer": "./cli.js"
}, },
"dependencies": { "dependencies": {
"puppeteer": "^13.5.2" "puppeteer": "~14.1.0"
}, },
"engines": { "engines": {
"node": ">=14" "node": ">=14"

@ -4,7 +4,7 @@
"author": "Wappalyzer", "author": "Wappalyzer",
"homepage_url": "https://www.wappalyzer.com/", "homepage_url": "https://www.wappalyzer.com/",
"description": "Identify web technologies", "description": "Identify web technologies",
"version": "6.10.27", "version": "6.10.35",
"default_locale": "en", "default_locale": "en",
"manifest_version": 2, "manifest_version": 2,
"icons": { "icons": {

@ -13,7 +13,7 @@
"software" "software"
], ],
"homepage": "https://www.wappalyzer.com/", "homepage": "https://www.wappalyzer.com/",
"version": "6.10.27", "version": "6.10.35",
"author": "Wappalyzer", "author": "Wappalyzer",
"license": "MIT", "license": "MIT",
"repository": { "repository": {