diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index b01706513..b09462124 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -1,4 +1,4 @@ -const os = require('os') +// const os = require('os') const fs = require('fs') const dns = require('dns').promises const path = require('path') @@ -16,18 +16,12 @@ const chromiumArgs = [ '--no-sandbox', '--no-zygote', '--disable-gpu', - '--disable-software-rasterizer', - '--disable-features=AudioServiceOutOfProcess', '--ignore-certificate-errors', '--allow-running-insecure-content', '--disable-web-security', `--user-data-dir=${CHROMIUM_DATA_DIR || '/tmp/chromium'}`, ] -if (os.arch() === 'arm64') { - chromiumArgs.push('--single-process') -} - const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/ const categories = JSON.parse( @@ -429,7 +423,7 @@ class Site { emit(event, params) { if (this.listeners[event]) { - return Promise.all( + return Promise.allSettled( this.listeners[event].map((listener) => listener(params)) ) } @@ -483,7 +477,7 @@ class Site { return [] } - this.log(`Navigate to ${url}`, 'page') + this.log(`Navigate to ${url}`) this.analyzedUrls[url.href] = { status: 0, @@ -501,7 +495,13 @@ class Site { try { page = await this.browser.newPage() + + if (!page || page.isClosed()) { + throw new Error('Page did not open') + } } catch (error) { + error.message += ` (${url})` + this.error(error) await this.initDriver() @@ -517,9 +517,15 @@ class Site { await page.setRequestInterception(true) + await page.setUserAgent(this.options.userAgent) + page.on('dialog', (dialog) => dialog.dismiss()) - page.on('error', (error) => this.error(error)) + page.on('error', (error) => { + error.message += ` (${url})` + + this.error(error) + }) let responseReceived = false @@ -531,6 +537,8 @@ class Site { try { ;({ hostname } = new URL(request.url())) } catch (error) { + request.abort('blockedbyclient') + return } @@ -571,12 +579,14 @@ class Site { request.continue({ headers }) } } catch (error) { + error.message += ` (${url})` + this.error(error) } }) page.on('response', async (response) => { - if (this.destroyed || !page || page.isClosed()) { + if (this.destroyed || !page || page.__closed || page.isClosed()) { return } @@ -591,7 +601,11 @@ class Site { await this.onDetect(response.url(), analyze({ scripts })) } } catch (error) { - this.error(error) + if (error.constructor.name !== 'ProtocolError') { + error.message += ` (${url})` + + this.error(error) + } } try { @@ -641,26 +655,21 @@ class Site { await this.emit('response', { page, response, headers, certIssuer }) } } catch (error) { + error.message += ` (${url})` + this.error(error) } }) - await page.setUserAgent(this.options.userAgent) - try { - try { - await this.promiseTimeout(page.goto(url.href)) - } catch (error) { - if ( - error.constructor.name !== 'TimeoutError' && - error.code !== 'PROMISE_TIMEOUT_ERROR' - ) { - throw error - } - } + await page.goto(url.href) if (page.url() === 'about:blank') { - throw new Error(`The page failed to load (${url.href})`) + const error = new Error(`The page failed to load (${url})`) + + error.code = 'WAPPALYZER_PAGE_EMPTY' + + throw error } if (!this.options.noScripts) { @@ -681,6 +690,8 @@ class Site { {} ) } catch (error) { + error.message += ` (${url})` + this.error(error) } @@ -922,16 +933,20 @@ class Site { ...this.cache[url.href], }) + page.__closed = true + try { await page.close() + + this.log(`Page closed (${url})`) } catch (error) { // Continue } - this.log(`Page closed (${url})`) - return reducedLinks } catch (error) { + page.__closed = true + try { await page.close() @@ -948,43 +963,35 @@ class Site { // Continue } - if ( - error.constructor.name === 'TimeoutError' || - error.code === 'PROMISE_TIMEOUT_ERROR' - ) { - const newError = new Error( - `The website took too long to respond: ${ - error.message || error - } at ${hostname}` - ) - - newError.code = 'WAPPALYZER_TIMEOUT_ERROR' - - throw newError - } - if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) { - const newError = new Error( - `Hostname could not be resolved at ${hostname}` - ) + const newError = new Error(`Hostname could not be resolved (${url})`) newError.code = 'WAPPALYZER_DNS_ERROR' throw newError } + if ( + error.constructor.name === 'TimeoutError' || + error.code === 'PROMISE_TIMEOUT_ERROR' + ) { + error.code = 'WAPPALYZER_TIMEOUT_ERROR' + } + + error.message += ` (${url})` + throw error } } async analyze(url = this.originalUrl, index = 1, depth = 1) { - try { - if (this.options.recursive) { - await sleep(this.options.delay * index) - } + if (this.options.recursive) { + await sleep(this.options.delay * index) + } - await Promise.all([ - (async () => { + await Promise.allSettled([ + (async () => { + try { const links = ((await this.goto(url)) || []).filter( ({ href }) => !this.analyzedUrls[href] ) @@ -1003,23 +1010,25 @@ class Site { depth + 1 ) } - })(), - (async () => { - if (this.options.probe && !this.probed) { - this.probed = true - - await this.probe(url) + } catch (error) { + this.analyzedUrls[url.href] = { + status: this.analyzedUrls[url.href]?.status || 0, + error: error.message || error.toString(), } - })(), - ]) - } catch (error) { - this.analyzedUrls[url.href] = { - status: this.analyzedUrls[url.href]?.status || 0, - error: error.message || error.toString(), - } - this.error(error) - } + error.message += ` (${url})` + + this.error(error) + } + })(), + (async () => { + if (this.options.probe && !this.probed) { + this.probed = true + + await this.probe(url) + } + })(), + ]) const patterns = this.options.extended ? this.detections.reduce( @@ -1096,6 +1105,8 @@ class Site { return this.promiseTimeout( func(hostname).catch((error) => { if (error.code !== 'ENODATA') { + error.message += ` (${url})` + this.error(error) } @@ -1109,7 +1120,7 @@ class Site { const domain = url.hostname.replace(/^www\./, '') - await Promise.all([ + await Promise.allSettled([ // Static files ...Object.keys(files).map(async (file, index) => { const path = files[file] @@ -1119,7 +1130,7 @@ class Site { const body = await get(new URL(path, url.href), { userAgent: this.options.userAgent, - timeout: Math.min(this.options.maxWait, 3000), + timeout: Math.min(this.options.maxWait, 1000), }) this.log(`Probe ok (${path})`) @@ -1176,7 +1187,7 @@ class Site { const batched = links.splice(0, this.options.batchSize) - await Promise.all( + await Promise.allSettled( batched.map((link, index) => this.analyze(link, index, depth)) ) @@ -1209,7 +1220,7 @@ class Site { ), ] - await Promise.all( + await Promise.allSettled( requires.map(async ({ name, categoryId, technologies }) => { const id = categoryId ? `category:${categoryId}` @@ -1262,9 +1273,11 @@ class Site { } async destroy() { - await Promise.all( + await Promise.allSettled( this.pages.map(async (page) => { if (page) { + page.__closed = true + try { await page.close() } catch (error) { diff --git a/src/drivers/npm/package.json b/src/drivers/npm/package.json index a2cb12dcb..be190cf96 100644 --- a/src/drivers/npm/package.json +++ b/src/drivers/npm/package.json @@ -13,7 +13,7 @@ "software" ], "homepage": "https://www.wappalyzer.com/", - "version": "6.10.27", + "version": "6.10.35", "author": "Wappalyzer", "license": "MIT", "repository": { @@ -38,7 +38,7 @@ "wappalyzer": "./cli.js" }, "dependencies": { - "puppeteer": "^13.5.2" + "puppeteer": "~14.1.0" }, "engines": { "node": ">=14" diff --git a/src/drivers/webextension/manifest.json b/src/drivers/webextension/manifest.json index 41c3095a3..5c2cb2850 100644 --- a/src/drivers/webextension/manifest.json +++ b/src/drivers/webextension/manifest.json @@ -4,7 +4,7 @@ "author": "Wappalyzer", "homepage_url": "https://www.wappalyzer.com/", "description": "Identify web technologies", - "version": "6.10.27", + "version": "6.10.35", "default_locale": "en", "manifest_version": 2, "icons": { diff --git a/src/package.json b/src/package.json index 0254441fc..82a980796 100644 --- a/src/package.json +++ b/src/package.json @@ -13,7 +13,7 @@ "software" ], "homepage": "https://www.wappalyzer.com/", - "version": "6.10.27", + "version": "6.10.35", "author": "Wappalyzer", "license": "MIT", "repository": {