From d562506095b0c859507e7ea6f46cc9da96fb88e5 Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Wed, 27 Oct 2021 12:36:01 +1100 Subject: [PATCH] Implement categoryRequires in WebExtension --- src/drivers/npm/driver.js | 2 +- src/drivers/webextension/js/content.js | 23 +++++++----- src/drivers/webextension/js/driver.js | 48 +++++++++++++++----------- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index a8441831b..2284ff9b5 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -1,4 +1,3 @@ -/* eslint-disable unicorn/prefer-text-content */ const { URL } = require('url') const fs = require('fs') const dns = require('dns').promises @@ -733,6 +732,7 @@ class Site { ( await this.promiseTimeout( page.evaluateHandle(() => + // eslint-disable-next-line unicorn/prefer-text-content document.body.innerText.replace(/\s+/g, ' ') ), { jsonValue: () => '' }, diff --git a/src/drivers/webextension/js/content.js b/src/drivers/webextension/js/content.js index 50b19a0bc..f2876972f 100644 --- a/src/drivers/webextension/js/content.js +++ b/src/drivers/webextension/js/content.js @@ -176,6 +176,10 @@ const Content = { {} ) + // Text + // eslint-disable-next-line unicorn/prefer-text-content + const text = document.body.innerText.replace(/\s+/g, ' ') + // CSS rules let css = [] @@ -269,7 +273,7 @@ const Content = { } } - Content.cache = { html, css, scriptSrc, scripts, meta, cookies } + Content.cache = { html, text, css, scriptSrc, scripts, meta, cookies } await Content.driver('onContentLoad', [ url, @@ -351,17 +355,20 @@ const Content = { async analyzeRequires(url, requires) { await Promise.all( - requires.map(async ({ name, technologies }) => { - if (!Content.analyzedRequires.includes(name)) { - Content.analyzedRequires.push(name) + requires.map(async ({ name, categoryId, technologies }) => { + const id = categoryId ? `category:${categoryId}` : `technology:${name}` + + if (!Content.analyzedRequires.includes(id)) { + Content.analyzedRequires.push(id) await Promise.all([ - Content.onGetTechnologies(technologies, name), + Content.onGetTechnologies(technologies, name, categoryId), Content.driver('onContentLoad', [ url, Content.cache, Content.language, name, + categoryId, ]), ]) } @@ -373,15 +380,15 @@ const Content = { * Callback for getTechnologies * @param {Array} technologies */ - async onGetTechnologies(technologies = [], requires) { + async onGetTechnologies(technologies = [], requires, categoryRequires) { const url = location.href const js = await getJs(technologies) const dom = await getDom(technologies) await Promise.all([ - Content.driver('analyzeJs', [url, js, requires]), - Content.driver('analyzeDom', [url, dom, requires]), + Content.driver('analyzeJs', [url, js, requires, categoryRequires]), + Content.driver('analyzeDom', [url, dom, requires, categoryRequires]), ]) }, } diff --git a/src/drivers/webextension/js/driver.js b/src/drivers/webextension/js/driver.js index 2039cd73c..ec2a7d1a8 100644 --- a/src/drivers/webextension/js/driver.js +++ b/src/drivers/webextension/js/driver.js @@ -21,6 +21,16 @@ const xhrDebounce = [] const scriptsPending = [] +function getRequiredTechnologies(name, categoryId) { + return name + ? Wappalyzer.requires.find(({ name: _name }) => _name === name).technologies + : categoryId + ? Wappalyzer.categoryRequires.find( + ({ categoryId: _categoryId }) => _categoryId === categoryId + ).technologies + : undefined +} + const Driver = { lastPing: Date.now(), @@ -198,10 +208,8 @@ const Driver = { * @param {String} url * @param {Array} js */ - async analyzeJs(url, js, requires) { - const technologies = requires - ? Wappalyzer.requires.find(({ name }) => name === requires).technologies - : Wappalyzer.technologies + async analyzeJs(url, js, requires, categoryRequires) { + const technologies = getRequiredTechnologies(requires, categoryRequires) return Driver.onDetect( url, @@ -227,10 +235,8 @@ const Driver = { * @param {String} url * @param {Array} dom */ - async analyzeDom(url, dom, requires) { - const technologies = requires - ? Wappalyzer.requires[requires].technologies - : Wappalyzer.technologies + async analyzeDom(url, dom, requires, categoryRequires) { + const technologies = getRequiredTechnologies(requires, categoryRequires) return Driver.onDetect( url, @@ -468,7 +474,7 @@ const Driver = { * @param {Object} items * @param {String} language */ - async onContentLoad(url, items, language, requires) { + async onContentLoad(url, items, language, requires, categoryRequires) { try { items.cookies = items.cookies || {} @@ -481,12 +487,11 @@ const Driver = { ({ name, value }) => (items.cookies[name.toLowerCase()] = [value]) ) + const technologies = getRequiredTechnologies(requires, categoryRequires) + await Driver.onDetect( url, - await analyze( - { url, ...items }, - requires ? Wappalyzer.requires[requires].technologies : undefined - ), + await analyze({ url, ...items }, technologies), language, true ) @@ -533,10 +538,6 @@ const Driver = { return } - Driver.log([ - ...new Set(detections.map(({ technology }) => technology.name)), - ]) - url = url.split('#')[0] const { hostname } = new URL(url) @@ -632,9 +633,16 @@ const Driver = { return detection }) - const requires = Wappalyzer.requires.filter(({ name, technologies }) => - resolved.some(({ name: _name }) => _name === name) - ) + const requires = [ + ...Wappalyzer.requires.filter(({ name }) => + resolved.some(({ name: _name }) => _name === name) + ), + ...Wappalyzer.categoryRequires.filter(({ categoryId }) => + resolved.some(({ categories }) => + categories.some(({ id }) => id === categoryId) + ) + ), + ] try { await Driver.content(url, 'analyzeRequires', [url, requires])