From ea27dce1a374b88588ad05447cb1d7da2cc714a7 Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Thu, 1 Oct 2020 11:25:44 +1000 Subject: [PATCH] Add DOM inspection method, add Crisp Live Chat, update technology descriptions --- src/drivers/npm/driver.js | 104 ++++++++++++++++++ .../images/icons/Crisp Live Chat.svg | 3 + src/technologies.json | 31 +++++- src/wappalyzer.js | 87 +++++++++------ 4 files changed, 190 insertions(+), 35 deletions(-) create mode 100644 src/drivers/webextension/images/icons/Crisp Live Chat.svg diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index b62ebed6a..d304830d5 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -9,6 +9,7 @@ const { setTechnologies, setCategories, analyze, + analyzeOneToMany, analyzeManyToMany, resolve, } = Wappalyzer @@ -67,6 +68,35 @@ function analyzeJs(js) { ) } +function analyzeDom(dom) { + return Array.prototype.concat.apply( + [], + dom.map(({ name, selector, text, property, attribute, value }) => { + const technology = Wappalyzer.technologies.find( + ({ name: _name }) => name === _name + ) + + if (text) { + return analyzeManyToMany(technology, 'dom.text', { [selector]: [text] }) + } + + if (property) { + return analyzeManyToMany(technology, `dom.properties.${property}`, { + [selector]: [value], + }) + } + + if (attribute) { + return analyzeManyToMany(technology, `dom.attributes.${attribute}`, { + [selector]: [value], + }) + } + + return [] + }) + ) +} + function get(url) { if (['http:', 'https:'].includes(url.protocol)) { const { get } = url.protocol === 'http:' ? http : https @@ -488,6 +518,79 @@ class Site { ) ) + // DOM + const dom = await this.promiseTimeout( + page.evaluate( + (technologies) => { + return technologies.reduce((technologies, { name, dom }) => { + const toScalar = (value) => + typeof value === 'string' || typeof value === 'number' + ? value + : !!value + + Object.keys(dom).forEach((selector) => { + const el = document.querySelector(selector) + + if (!el) { + return + } + + dom[selector].forEach(({ text, properties, attributes }) => { + if (text) { + const value = el.textContent.trim() + + if (value) { + technologies.push({ + name, + selector, + text: value, + }) + } + } + + if (properties) { + Object.keys(properties).forEach((property) => { + if (Object.prototype.hasOwnProperty.call(el, property)) { + const value = el[property] + + if (typeof value !== 'undefined') { + technologies.push({ + name, + selector, + property, + value: toScalar(value), + }) + } + } + }) + } + + if (attributes) { + Object.keys(attributes).forEach((attribute) => { + if (el.hasAttribute(attribute)) { + const value = el.getAttribute(attribute) + + technologies.push({ + name, + selector, + attribute, + value: toScalar(value), + }) + } + }) + } + }) + }) + + return technologies + }, []) + }, + Wappalyzer.technologies + .filter(({ dom }) => dom) + .map(({ name, dom }) => ({ name, dom })) + ) + ) + // Cookies const cookies = (await page.cookies()).reduce( (cookies, { name, value }) => ({ @@ -530,6 +633,7 @@ class Site { throw new Error('No response from server') } + this.onDetect(analyzeDom(dom)) this.onDetect(analyzeJs(js)) this.onDetect( diff --git a/src/drivers/webextension/images/icons/Crisp Live Chat.svg b/src/drivers/webextension/images/icons/Crisp Live Chat.svg new file mode 100644 index 000000000..3d2b750e4 --- /dev/null +++ b/src/drivers/webextension/images/icons/Crisp Live Chat.svg @@ -0,0 +1,3 @@ + + + diff --git a/src/technologies.json b/src/technologies.json index 79b65d384..43073583c 100644 --- a/src/technologies.json +++ b/src/technologies.json @@ -1682,7 +1682,7 @@ "cats": [ 19 ], - "description": "Auth0 headless browser sdk", + "description": "Auth0 provides authentication and authorization as a service.", "icon": "Auth0.png", "scripts": [ "/auth0(?:-js)?/([\\d.]+)/auth0(?:.min)?\\.js\\;version:\\1", @@ -1977,7 +1977,7 @@ "Blackbaud Luminate Online": { "cats": [ 41, - 51 + 32 ], "icon": "Blackbaud-Luminate-Online.png", "js": { @@ -1985,6 +1985,7 @@ }, "url": "/site/Donation2?.*df_id=", "scripts": "js/convio/modules\\.js", + "description": "Blackbaud Luminate Online provides online fundraising and marketing automation for nonprofits.", "website": "https://www.blackbaud.com/products/blackbaud-luminate-online" }, "Blade": { @@ -2758,6 +2759,7 @@ "js": { "Classy": "" }, + "description": "Classy is a class library for JavaScript applications.", "website": "https://www.classy.org/" }, "Clarity": { @@ -2784,6 +2786,7 @@ "scripts": [ "reveal\\.clearbit\\.com/v[(0-9)]/" ], + "description": "Clearbit Reveal identifies anonymous visitors to websites.", "website": "https://clearbit.com/reveal" }, "ClickFunnels": { @@ -4601,6 +4604,7 @@ ], "icon": "feedback-fish.svg", "scripts": "^https://feedback\\.fish/ff\\.js", + "description": "Feedback Fish is a widget for collecting website feedback from users.", "website": "https://feedback.fish" }, "Fedora": { @@ -4879,6 +4883,18 @@ "scripts": "analytics\\.freespee\\.com/js/external/fs\\.(?:min\\.)?js", "website": "https://www.freespee.com" }, + "Crisp Live Chat": { + "cats": [ + 52 + ], + "description": "Crisp Live Chat is a live chat solution with free and paid options.", + "icon": "Crisp Live Chat.svg", + "js": { + "$crisp": "", + "CRISP_WEBSITE_ID": "" + }, + "website": "https://crisp.chat/" + }, "Freshchat": { "cats": [ 52 @@ -10706,7 +10722,7 @@ "cats": [ 12 ], - "description": "Predictable state container for JavaScript apps", + "description": "Redux is a predictable state container for JavaScript applications.", "icon": "Redux.png", "scripts": [ "/redux(@|/)([\\d.]+)(?:/[a-z]+)?/redux(?:.min)?\\.js\\;version:\\2" @@ -10717,6 +10733,13 @@ "cats": [ 12 ], + "dom": { + "#root": { + "properties": { + "_reactRootContainer": "" + } + } + }, "cpe": "cpe:/a:facebook:react", "description": "React is an open-source JavaScript library for building user interfaces or UI components.", "html": "<[^>]+data-react", @@ -10751,7 +10774,7 @@ "cats": [ 12 ], - "description": "Declarative routing for React", + "description": "React Router provides declarative routing for React.", "icon": "React Router.png", "implies": "React", "scripts": [ diff --git a/src/wappalyzer.js b/src/wappalyzer.js index efc6d9a76..9083ad1d7 100644 --- a/src/wappalyzer.js +++ b/src/wappalyzer.js @@ -231,6 +231,7 @@ const Wappalyzer = { const { cats, url, + dom, html, css, robots, @@ -254,6 +255,7 @@ const Wappalyzer = { url: transform(url), headers: transform(headers), cookies: transform(cookies), + dom: transform(dom, true), html: transform(html), css: transform(css), certIssuer: transform(certIssuer), @@ -298,7 +300,7 @@ const Wappalyzer = { }, /** - * Extract information from regex pattern. + * Transform patterns for internal use. * @param {string|array} patterns */ transformPatterns(patterns, caseSensitive = false) { @@ -315,34 +317,7 @@ const Wappalyzer = { const parsed = Object.keys(patterns).reduce((parsed, key) => { parsed[caseSensitive ? key : key.toLowerCase()] = toArray( patterns[key] - ).map((pattern) => { - const { value, regex, confidence, version } = pattern - .split('\\;') - .reduce((attrs, attr, i) => { - if (i) { - // Key value pairs - attr = attr.split(':') - - if (attr.length > 1) { - attrs[attr.shift()] = attr.join(':') - } - } else { - attrs.value = attr - - // Escape slashes in regular expression - attrs.regex = new RegExp(attr.replace(/\//g, '\\/'), 'i') - } - - return attrs - }, {}) - - return { - value, - regex, - confidence: parseInt(confidence || 100, 10), - version: version || '', - } - }) + ).map((pattern) => Wappalyzer.parsePattern(pattern)) return parsed }, {}) @@ -350,6 +325,49 @@ const Wappalyzer = { return 'main' in parsed ? parsed.main : parsed }, + /** + * Extract information from regex pattern. + * @param {string|object} pattern + */ + parsePattern(pattern) { + if (typeof pattern === 'object') { + return Object.keys(pattern).reduce( + (parsed, key) => ({ + ...parsed, + [key]: Wappalyzer.parsePattern(pattern[key]), + }), + {} + ) + } else { + const { value, regex, confidence, version } = pattern + .split('\\;') + .reduce((attrs, attr, i) => { + if (i) { + // Key value pairs + attr = attr.split(':') + + if (attr.length > 1) { + attrs[attr.shift()] = attr.join(':') + } + } else { + attrs.value = attr + + // Escape slashes in regular expression + attrs.regex = new RegExp(attr.replace(/\//g, '\\/'), 'i') + } + + return attrs + }, {}) + + return { + value, + regex, + confidence: parseInt(confidence || 100, 10), + version: version || '', + } + } + }, + /** * @todo describe * @param {Object} technology @@ -400,12 +418,19 @@ const Wappalyzer = { * @param {String} type * @param {Array} items */ - analyzeManyToMany(technology, type, items = {}) { + analyzeManyToMany(technology, types, items = {}) { + const [type, ...subtypes] = types.split('.') + return Object.keys(technology[type]).reduce((technologies, key) => { const patterns = technology[type][key] || [] const values = items[key] || [] - patterns.forEach((pattern) => { + patterns.forEach((_pattern) => { + const pattern = (subtypes || []).reduce( + (pattern, subtype) => pattern[subtype], + _pattern + ) + values.forEach((value) => { if (pattern.regex.test(value)) { technologies.push({