From bbc7cebf4ee3fd4570703a68e1e4db7915f68e24 Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Wed, 13 Oct 2021 09:07:53 +1100 Subject: [PATCH] Fix hang issue in NPM driver --- src/drivers/npm/driver.js | 247 ++++++++++++++++++++------------------ 1 file changed, 131 insertions(+), 116 deletions(-) diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index 0a4f332ee..1cd10c7db 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -459,7 +459,7 @@ class Site { promiseTimeout( promise, fallback, - errorMessage = 'Operation took too long to respond', + errorMessage = 'Operation took too long to complete', maxWait = this.options.maxWait ) { let timeout = null @@ -477,7 +477,13 @@ class Site { error.code = 'PROMISE_TIMEOUT_ERROR' - fallback !== undefined ? resolve(fallback) : reject(error) + if (fallback !== undefined) { + this.error(error) + + resolve(fallback) + } else { + reject(error) + } }, maxWait) }), promise.then((value) => { @@ -632,119 +638,6 @@ class Site { // page.on('console', (message) => this.log(message.text())) - // Links - const links = !this.options.recursive - ? [] - : await this.promiseTimeout( - ( - await this.promiseTimeout( - page.evaluateHandle(() => - Array.from(document.getElementsByTagName('a')).map( - ({ hash, hostname, href, pathname, protocol, rel }) => ({ - hash, - hostname, - href, - pathname, - protocol, - rel, - }) - ) - ), - { jsonValue: () => [] }, - 'Timeout (links)' - ) - ).jsonValue(), - [], - 'Timeout (links)' - ) - - // CSS - const css = await this.promiseTimeout( - ( - await this.promiseTimeout( - page.evaluateHandle((maxRows) => { - const css = [] - - try { - if (!document.styleSheets.length) { - return '' - } - - for (const sheet of Array.from(document.styleSheets)) { - for (const rules of Array.from(sheet.cssRules)) { - css.push(rules.cssText) - - if (css.length >= maxRows) { - break - } - } - } - } catch (error) { - return '' - } - - return css.join('\n') - }, this.options.htmlMaxRows), - { jsonValue: () => '' }, - 'Timeout (css)' - ) - ).jsonValue(), - '', - 'Timeout (css)' - ) - - // Script tags - const scripts = await this.promiseTimeout( - ( - await this.promiseTimeout( - page.evaluateHandle(() => - Array.from(document.getElementsByTagName('script')) - .map(({ src }) => src) - .filter((src) => src) - ), - { jsonValue: () => [] }, - 'Timeout (scripts)' - ) - ).jsonValue(), - [], - 'Timeout (scripts)' - ) - - // Meta tags - const meta = await this.promiseTimeout( - ( - await this.promiseTimeout( - page.evaluateHandle(() => - Array.from(document.querySelectorAll('meta')).reduce( - (metas, meta) => { - const key = - meta.getAttribute('name') || meta.getAttribute('property') - - if (key) { - metas[key.toLowerCase()] = [meta.getAttribute('content')] - } - - return metas - }, - {} - ) - ), - { jsonValue: () => [] }, - 'Timeout (meta)' - ) - ).jsonValue(), - [], - 'Timeout (meta)' - ) - - // JavaScript - const js = this.options.noScripts - ? [] - : await this.promiseTimeout(getJs(page), [], 'Timeout (js)') - - // DOM - const dom = await this.promiseTimeout(getDom(page), [], 'Timeout (dom)') - // Cookies const cookies = (await page.cookies()).reduce( (cookies, { name, value }) => ({ @@ -755,7 +648,7 @@ class Site { ) // HTML - let html = await page.content() + let html = await this.promiseTimeout(page.content(), '', 'Timeout (html)') if (this.options.htmlMaxCols && this.options.htmlMaxRows) { const batches = [] @@ -778,6 +671,128 @@ class Site { html = batches.join('\n') } + let links = [] + let css = '' + let scripts = [] + let meta = [] + let js = [] + let dom = [] + + if (html) { + // Links + links = !this.options.recursive + ? [] + : await this.promiseTimeout( + ( + await this.promiseTimeout( + page.evaluateHandle(() => + Array.from(document.getElementsByTagName('a')).map( + ({ hash, hostname, href, pathname, protocol, rel }) => ({ + hash, + hostname, + href, + pathname, + protocol, + rel, + }) + ) + ), + { jsonValue: () => [] }, + 'Timeout (links)' + ) + ).jsonValue(), + [], + 'Timeout (links)' + ) + + // CSS + css = await this.promiseTimeout( + ( + await this.promiseTimeout( + page.evaluateHandle((maxRows) => { + const css = [] + + try { + if (!document.styleSheets.length) { + return '' + } + + for (const sheet of Array.from(document.styleSheets)) { + for (const rules of Array.from(sheet.cssRules)) { + css.push(rules.cssText) + + if (css.length >= maxRows) { + break + } + } + } + } catch (error) { + return '' + } + + return css.join('\n') + }, this.options.htmlMaxRows), + { jsonValue: () => '' }, + 'Timeout (css)' + ) + ).jsonValue(), + '', + 'Timeout (css)' + ) + + // Script tags + scripts = await this.promiseTimeout( + ( + await this.promiseTimeout( + page.evaluateHandle(() => + Array.from(document.getElementsByTagName('script')) + .map(({ src }) => src) + .filter((src) => src) + ), + { jsonValue: () => [] }, + 'Timeout (scripts)' + ) + ).jsonValue(), + [], + 'Timeout (scripts)' + ) + + // Meta tags + meta = await this.promiseTimeout( + ( + await this.promiseTimeout( + page.evaluateHandle(() => + Array.from(document.querySelectorAll('meta')).reduce( + (metas, meta) => { + const key = + meta.getAttribute('name') || meta.getAttribute('property') + + if (key) { + metas[key.toLowerCase()] = [meta.getAttribute('content')] + } + + return metas + }, + {} + ) + ), + { jsonValue: () => [] }, + 'Timeout (meta)' + ) + ).jsonValue(), + [], + 'Timeout (meta)' + ) + + // JavaScript + js = this.options.noScripts + ? [] + : await this.promiseTimeout(getJs(page), [], 'Timeout (js)') + + // DOM + dom = await this.promiseTimeout(getDom(page), [], 'Timeout (dom)') + } + this.cache[url.href] = { page, html,