From 3c6dcf6afbda987b5bbaa0ab8b4107be4bf4363e Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Thu, 27 Feb 2020 15:38:10 +1100 Subject: [PATCH] Fix NPM/Puppeteer error handling --- src/drivers/npm/browsers/puppeteer.js | 198 +++++++++++++------------ src/drivers/npm/package.json | 2 +- src/drivers/webextension/manifest.json | 2 +- 3 files changed, 103 insertions(+), 99 deletions(-) diff --git a/src/drivers/npm/browsers/puppeteer.js b/src/drivers/npm/browsers/puppeteer.js index 5b422e833..efc70061d 100644 --- a/src/drivers/npm/browsers/puppeteer.js +++ b/src/drivers/npm/browsers/puppeteer.js @@ -57,117 +57,121 @@ class PuppeteerBrowser extends Browser { super(options); } - visit(url) { - return new Promise(async (resolve, reject) => { - let done = false; - let browser; - - try { - browser = await puppeteer.launch(chromium ? { - args: [...chromium.args, '--ignore-certificate-errors'], - defaultViewport: chromium.defaultViewport, - executablePath: await chromium.executablePath, - headless: chromium.headless, - } : { - args: ['--no-sandbox', '--headless', '--disable-gpu', '--ignore-certificate-errors', '--disable-dev-shm-usage'], - executablePath: CHROME_BIN, - }); + async visit(url) { + let done = false; + let browser; - browser.on('disconnected', () => { - if (!done) { - reject(new Error('browser: disconnected')); - } - }); + try { + await new Promise(async (resolve, _reject) => { + try { + browser = await puppeteer.launch(chromium ? { + args: [...chromium.args, '--ignore-certificate-errors'], + defaultViewport: chromium.defaultViewport, + executablePath: await chromium.executablePath, + headless: chromium.headless, + } : { + args: ['--no-sandbox', '--headless', '--disable-gpu', '--ignore-certificate-errors', '--disable-dev-shm-usage'], + executablePath: CHROME_BIN, + }); + + browser.on('disconnected', () => { + if (!done) { + _reject(new Error('browser: disconnected')); + } + }); - const page = await browser.newPage(); + const page = await browser.newPage(); - page.setDefaultTimeout(this.options.maxWait * 2); + page.setDefaultTimeout(this.options.maxWait * 2); - page.on('error', error => reject(new Error(`page error: ${error.message || error}`))); + page.on('error', error => _reject(new Error(`page error: ${error.message || error}`))); - page.on('response', (response) => { - try { - if (response.status() === 301 || response.status() === 302) { - return; - } + page.on('response', (response) => { + try { + if (response.status() === 301 || response.status() === 302) { + return; + } - if (!this.statusCode) { - this.statusCode = response.status(); + if (!this.statusCode) { + this.statusCode = response.status(); - this.headers = {}; + this.headers = {}; - const headers = response.headers(); + const headers = response.headers(); - Object.keys(headers).forEach((key) => { - this.headers[key] = Array.isArray(headers[key]) ? headers[key] : [headers[key]]; - }); + Object.keys(headers).forEach((key) => { + this.headers[key] = Array.isArray(headers[key]) ? headers[key] : [headers[key]]; + }); - this.contentType = headers['content-type'] || null; + this.contentType = headers['content-type'] || null; + } + } catch (error) { + _reject(new Error(`page error: ${error.message || error}`)); } - } catch (error) { - reject(new Error(`page error: ${error.message || error}`)); - } - }); + }); + + page.on('console', ({ _type, _text, _location }) => this.log(`${_text} (${_location.url}: ${_location.lineNumber})`, _type)); + + await page.setUserAgent(this.options.userAgent); + + await Promise.race([ + page.goto(url, { waitUntil: 'domcontentloaded' }), + new Promise(_resolve => setTimeout(() => { + this.log('Timeout', 'error'); + + _resolve(); + }, this.options.maxWait)), + ]); + + // eslint-disable-next-line no-undef + const links = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('a')).map(({ + hash, hostname, href, pathname, protocol, rel, + }) => ({ + hash, + hostname, + href, + pathname, + protocol, + rel, + }))); - page.on('console', ({ _type, _text, _location }) => this.log(`${_text} (${_location.url}: ${_location.lineNumber})`, _type)); - - await page.setUserAgent(this.options.userAgent); - - await Promise.race([ - page.goto(url, { waitUntil: 'domcontentloaded' }), - new Promise(_resolve => setTimeout(() => { - this.log('Timeout', 'error'); - - _resolve(); - }, this.options.maxWait)), - ]); - - // eslint-disable-next-line no-undef - const links = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('a')).map(({ - hash, hostname, href, pathname, protocol, rel, - }) => ({ - hash, - hostname, - href, - pathname, - protocol, - rel, - }))); - - this.links = await links.jsonValue(); - - // eslint-disable-next-line no-undef - const scripts = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('script')).map(({ - src, - }) => src)); - - this.scripts = (await scripts.jsonValue()).filter(script => script); - - this.js = await page.evaluate(getJs); - - this.cookies = (await page.cookies()).map(({ - name, value, domain, path, - }) => ({ - name, value, domain, path, - })); - - this.html = await page.content(); - - resolve(); - } catch (error) { - reject(new Error(`visit error: ${error.message || error}`)); - } finally { - done = true; - - if (browser) { - try { - await browser.close(); - } catch (error) { - this.log(error.message || error.toString(), 'error'); - } + this.links = await links.jsonValue(); + + // eslint-disable-next-line no-undef + const scripts = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('script')).map(({ + src, + }) => src)); + + this.scripts = (await scripts.jsonValue()).filter(script => script); + + this.js = await page.evaluate(getJs); + + this.cookies = (await page.cookies()).map(({ + name, value, domain, path, + }) => ({ + name, value, domain, path, + })); + + this.html = await page.content(); + + resolve(); + } catch (error) { + _reject(new Error(`visit error: ${error.message || error}`)); + } + }); + } catch (error) { + done = true; + + if (browser) { + try { + await browser.close(); + } catch (_error) { + this.log(_error.message || _error.toString(), 'error'); } } - }); + + throw new Error(error.message || error.toString()); + } } } diff --git a/src/drivers/npm/package.json b/src/drivers/npm/package.json index 21613c8ea..64a29d62c 100644 --- a/src/drivers/npm/package.json +++ b/src/drivers/npm/package.json @@ -2,7 +2,7 @@ "name": "wappalyzer", "description": "Uncovers the technologies used on websites", "homepage": "https://github.com/AliasIO/Wappalyzer", - "version": "5.9.21", + "version": "5.9.22", "author": "Elbert Alias", "license": "GPL-3.0", "repository": { diff --git a/src/drivers/webextension/manifest.json b/src/drivers/webextension/manifest.json index bc7fe4856..be890ceca 100644 --- a/src/drivers/webextension/manifest.json +++ b/src/drivers/webextension/manifest.json @@ -4,7 +4,7 @@ "author": "Elbert Alias", "homepage_url": "https://www.wappalyzer.com", "description": "Identify web technologies", - "version": "5.9.20", + "version": "5.9.22", "default_locale": "en", "manifest_version": 2, "icons": {