From a8f64ee5661b521656ed9175d6c4c6687dce7e6a Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Mon, 11 Jul 2022 11:28:44 +1000 Subject: [PATCH] Fix performance issue in WebExtension driver --- src/drivers/webextension/js/driver.js | 39 ++++++++++++++++----------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/drivers/webextension/js/driver.js b/src/drivers/webextension/js/driver.js index 405f2de40..f343a0ac3 100644 --- a/src/drivers/webextension/js/driver.js +++ b/src/drivers/webextension/js/driver.js @@ -21,8 +21,6 @@ const xhrDebounce = [] let xhrAnalyzed = {} -const scriptsPending = [] - function getRequiredTechnologies(name, categoryId) { return name ? Wappalyzer.requires.find(({ name: _name }) => _name === name).technologies @@ -421,19 +419,29 @@ const Driver = { return } - if (scriptsPending.includes(request.url)) { - scriptsPending.splice(scriptsPending.indexOf(request.url), 1) - } else if (request.statusCode === 200) { - scriptsPending.push(request.url) + const { hostname } = new URL(request.documentUrl) - const response = await fetch(request.url) + if (!Driver.cache.hostnames[hostname]) { + Driver.cache.hostnames[hostname] = {} + } - const scripts = await response.text() + if (!Driver.cache.hostnames[hostname].analyzedScripts) { + Driver.cache.hostnames[hostname].analyzedScripts = [] + } - Driver.onDetect(request.documentUrl, analyze({ scripts })).catch( - Driver.error - ) + if (Driver.cache.hostnames[hostname].analyzedScripts.length > 50) { + return } + + Driver.cache.hostnames[hostname].analyzedScripts.push(request.url) + + const response = await fetch(request.url) + + const scripts = (await response.text()).slice(0, 500000) + + Driver.onDetect(request.documentUrl, analyze({ scripts })).catch( + Driver.error + ) }, /** @@ -554,16 +562,15 @@ const Driver = { const { hostname } = new URL(url) // Cache detections - const cache = (Driver.cache.hostnames[hostname] = Driver.cache.hostnames[ - hostname - ] || { + const cache = (Driver.cache.hostnames[hostname] = { detections: [], hits: incrementHits ? 0 : 1, https: url.startsWith('https://'), + analyzedScripts: [], + ...(Driver.cache.hostnames[hostname] || []), + dateTime: Date.now(), }) - cache.dateTime = Date.now() - // Remove duplicates cache.detections = cache.detections .concat(detections)