From 27e9b2fcbbd387b1ae150c0b5db25efd73d704cd Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Wed, 10 Jun 2020 09:29:31 +1000 Subject: [PATCH] Refactoring --- bin/validate | 6 +- src/drivers/webextension/js/content.js | 20 +- src/drivers/webextension/js/driver.bak.js | 445 --------- src/drivers/webextension/js/driver.js | 113 +-- src/drivers/webextension/js/lib/iframe.js | 13 +- src/drivers/webextension/js/popup.bak.js | 333 ------- src/drivers/webextension/js/wappalyzer.bak.js | 727 -------------- src/drivers/webextension/manifest.json | 6 - src/wappalyzer.js | 928 +++++------------- 9 files changed, 339 insertions(+), 2252 deletions(-) delete mode 100644 src/drivers/webextension/js/driver.bak.js delete mode 100644 src/drivers/webextension/js/popup.bak.js delete mode 100644 src/drivers/webextension/js/wappalyzer.bak.js diff --git a/bin/validate b/bin/validate index 3e5007a71..67f567b51 100755 --- a/bin/validate +++ b/bin/validate @@ -16,6 +16,6 @@ echo "Validating icons..." ./bin/validate-icons -echo "Running tests..." - -yarn run test +# echo "Running tests..." +# +# yarn run test diff --git a/src/drivers/webextension/js/content.js b/src/drivers/webextension/js/content.js index 405be35d6..a1deeb4a6 100644 --- a/src/drivers/webextension/js/content.js +++ b/src/drivers/webextension/js/content.js @@ -3,11 +3,19 @@ /* globals chrome */ const Content = { - port: chrome.runtime.connect({ name: 'content.js' }), - async init() { await new Promise((resolve) => setTimeout(resolve, 1000)) + Content.port = chrome.runtime.connect({ name: 'content.js' }) + + Content.port.onMessage.addListener(({ func, args }) => { + const onFunc = `on${func.charAt(0).toUpperCase() + func.slice(1)}` + + if (Content[onFunc]) { + Content[onFunc](args) + } + }) + try { // HTML let html = new XMLSerializer().serializeToString(document) @@ -100,14 +108,6 @@ const Content = { } } -Content.port.onMessage.addListener(({ func, args }) => { - const onFunc = `on${func.charAt(0).toUpperCase() + func.slice(1)}` - - if (Content[onFunc]) { - Content[onFunc](args) - } -}) - if (/complete|interactive|loaded/.test(document.readyState)) { Content.init() } else { diff --git a/src/drivers/webextension/js/driver.bak.js b/src/drivers/webextension/js/driver.bak.js deleted file mode 100644 index 0ff7d491c..000000000 --- a/src/drivers/webextension/js/driver.bak.js +++ /dev/null @@ -1,445 +0,0 @@ -/** - * WebExtension driver - */ - -/* eslint-env browser */ -/* global browser, chrome, Wappalyzer */ - -/** global: browser */ -/** global: chrome */ -/** global: fetch */ -/** global: Wappalyzer */ - -const wappalyzer = new Wappalyzer() - -const tabCache = {} -const robotsTxtQueue = {} - -let categoryOrder = [] - -browser.tabs.onRemoved.addListener((tabId) => { - tabCache[tabId] = null -}) - -function userAgent() { - const url = chrome.extension.getURL('/') - - if (url.startsWith('moz-')) { - return 'firefox' - } - - if (url.startsWith('ms-browser')) { - return 'edge' - } - - return 'chrome' -} - -/** - * Get a value from localStorage - */ -function getOption(name, defaultValue = null) { - return new Promise(async (resolve, reject) => { - let value = defaultValue - - try { - const option = await browser.storage.local.get(name) - - if (option[name] !== undefined) { - value = option[name] - } - } catch (error) { - wappalyzer.log(error.message, 'driver', 'error') - - return reject(error.message) - } - - return resolve(value) - }) -} - -/** - * Set a value in localStorage - */ -function setOption(name, value) { - return new Promise(async (resolve, reject) => { - try { - await browser.storage.local.set({ [name]: value }) - } catch (error) { - wappalyzer.log(error.message, 'driver', 'error') - - return reject(error.message) - } - - return resolve() - }) -} - -/** - * Open a tab - */ -function openTab(args) { - browser.tabs.create({ - url: args.url, - active: args.background === undefined || !args.background - }) -} - -/** - * Make a POST request - */ -async function post(url, body) { - try { - const response = await fetch(url, { - method: 'POST', - body: JSON.stringify(body) - }) - - wappalyzer.log(`POST ${url}: ${response.status}`, 'driver') - } catch (error) { - wappalyzer.log(`POST ${url}: ${error}`, 'driver', 'error') - } -} - -// Capture response headers -browser.webRequest.onCompleted.addListener( - async (request) => { - const headers = {} - - if (request.responseHeaders) { - const url = wappalyzer.parseUrl(request.url) - - let tab - - try { - ;[tab] = await browser.tabs.query({ url: [url.href] }) - } catch (error) { - wappalyzer.log(error, 'driver', 'error') - } - - if (tab) { - request.responseHeaders.forEach((header) => { - const name = header.name.toLowerCase() - - headers[name] = headers[name] || [] - - headers[name].push( - (header.value || header.binaryValue || '').toString() - ) - }) - - if ( - headers['content-type'] && - /\/x?html/.test(headers['content-type'][0]) - ) { - wappalyzer.analyze(url, { headers }, { tab }) - } - } - } - }, - { urls: ['http://*/*', 'https://*/*'], types: ['main_frame'] }, - ['responseHeaders'] -) - -browser.runtime.onConnect.addListener((port) => { - port.onMessage.addListener(async (message) => { - if (message.id === undefined) { - return - } - - if (message.id !== 'log') { - wappalyzer.log(`Message from ${port.name}: ${message.id}`, 'driver') - } - - const pinnedCategory = await getOption('pinnedCategory') - - const url = wappalyzer.parseUrl(port.sender.tab ? port.sender.tab.url : '') - - const cookies = await browser.cookies.getAll({ - domain: `.${url.hostname}` - }) - - let response - - switch (message.id) { - case 'log': - wappalyzer.log(message.subject, message.source) - - break - case 'init': - wappalyzer.analyze(url, { cookies }, { tab: port.sender.tab }) - - break - case 'analyze': - if (message.subject.html) { - browser.i18n - .detectLanguage(message.subject.html) - .then(({ languages }) => { - const language = languages - .filter(({ percentage }) => percentage >= 75) - .map(({ language: lang }) => lang)[0] - - message.subject.language = language - - wappalyzer.analyze(url, message.subject, { tab: port.sender.tab }) - }) - } else { - wappalyzer.analyze(url, message.subject, { tab: port.sender.tab }) - } - - await setOption('hostnameCache', wappalyzer.hostnameCache) - - break - case 'ad_log': - wappalyzer.cacheDetectedAds(message.subject) - - break - case 'get_apps': - response = { - tabCache: tabCache[message.tab.id], - apps: wappalyzer.apps, - categories: wappalyzer.categories, - pinnedCategory, - termsAccepted: - userAgent() === 'chrome' || - (await getOption('termsAccepted', false)) - } - - break - case 'set_option': - await setOption(message.key, message.value) - - break - case 'get_js_patterns': - response = { - patterns: wappalyzer.jsPatterns - } - - break - case 'update_theme_mode': - // Sync theme mode to popup. - response = { - themeMode: await getOption('themeMode', false) - } - - break - default: - // Do nothing - } - - if (response) { - port.postMessage({ - id: message.id, - response - }) - } - }) -}) - -wappalyzer.driver.document = document - -/** - * Log messages to console - */ -wappalyzer.driver.log = (message, source, type) => { - const log = ['warn', 'error'].includes(type) ? type : 'log' - - console[log](`[wappalyzer ${type}]`, `[${source}]`, message) // eslint-disable-line no-console -} - -/** - * Display apps - */ -wappalyzer.driver.displayApps = async (detected, meta, context) => { - const { tab } = context - - if (tab === undefined) { - return - } - - tabCache[tab.id] = tabCache[tab.id] || { - detected: [] - } - - tabCache[tab.id].detected = detected - - const pinnedCategory = await getOption('pinnedCategory') - const dynamicIcon = await getOption('dynamicIcon', true) - - let found = false - - // Find the main application to display - ;[pinnedCategory].concat(categoryOrder).forEach((match) => { - Object.keys(detected).forEach((appName) => { - const app = detected[appName] - - app.props.cats.forEach((category) => { - if (category === match && !found) { - let icon = - app.props.icon && dynamicIcon ? app.props.icon : 'default.svg' - - if (/\.svg$/i.test(icon)) { - icon = `converted/${icon.replace(/\.svg$/, '.png')}` - } - - try { - browser.pageAction.setIcon({ - tabId: tab.id, - path: `../images/icons/${icon}` - }) - } catch (e) { - // Firefox for Android does not support setIcon see https://bugzilla.mozilla.org/show_bug.cgi?id=1331746 - } - - found = true - } - }) - }) - }) - - browser.pageAction.show(tab.id) -} - -/** - * Fetch and cache robots.txt for host - */ -wappalyzer.driver.getRobotsTxt = async (host, secure = false) => { - if (robotsTxtQueue[host]) { - return robotsTxtQueue[host] - } - - const tracking = await getOption('tracking', true) - const robotsTxtCache = await getOption('robotsTxtCache', {}) - - robotsTxtQueue[host] = new Promise(async (resolve) => { - if (!tracking) { - return resolve([]) - } - - if (host in robotsTxtCache) { - return resolve(robotsTxtCache[host]) - } - - const timeout = setTimeout(() => resolve([]), 3000) - - let response - - try { - response = await fetch(`http${secure ? 's' : ''}://${host}/robots.txt`, { - redirect: 'follow', - mode: 'no-cors' - }) - } catch (error) { - wappalyzer.log(error, 'driver', 'error') - - return resolve([]) - } - - clearTimeout(timeout) - - const robotsTxt = response.ok ? await response.text() : '' - - robotsTxtCache[host] = Wappalyzer.parseRobotsTxt(robotsTxt) - - await setOption('robotsTxtCache', robotsTxtCache) - - delete robotsTxtQueue[host] - - return resolve(robotsTxtCache[host]) - }) - - return robotsTxtQueue[host] -} - -/** - * Anonymously track detected applications for research purposes - */ -wappalyzer.driver.ping = async ( - hostnameCache = { expires: 0, hostnames: {} }, - adCache = [] -) => { - const tracking = await getOption('tracking', true) - const termsAccepted = - userAgent() === 'chrome' || (await getOption('termsAccepted', false)) - - if (tracking && termsAccepted) { - if ( - hostnameCache.hostnames && - Object.keys(hostnameCache.hostnames).length - ) { - post('https://api.wappalyzer.com/ping/v1/', hostnameCache.hostnames) - } - - if (adCache.length) { - post('https://ad.wappalyzer.com/log/wp/', adCache) - } - - await setOption('robotsTxtCache', {}) - } -} - -// Init -;(async () => { - // Technologies - try { - const response = await fetch('../apps.json') - const json = await response.json() - - wappalyzer.apps = json.apps - wappalyzer.categories = json.categories - } catch (error) { - wappalyzer.log(`GET apps.json: ${error.message}`, 'driver', 'error') - } - - wappalyzer.parseJsPatterns() - - categoryOrder = Object.keys(wappalyzer.categories) - .map((categoryId) => parseInt(categoryId, 10)) - .sort( - (a, b) => - wappalyzer.categories[a].priority - wappalyzer.categories[b].priority - ) - - // Version check - const { version } = browser.runtime.getManifest() - const previousVersion = await getOption('version') - const upgradeMessage = await getOption('upgradeMessage', true) - - if (previousVersion === null) { - openTab({ - url: `${wappalyzer.config.websiteURL}installed` - }) - } else if (version !== previousVersion && upgradeMessage) { - openTab({ - url: `${wappalyzer.config.websiteURL}upgraded?v${version}`, - background: true - }) - } - - await setOption('version', version) - - // Hostname cache - wappalyzer.hostnameCache = await getOption('hostnameCache', { - expires: Date.now() + 1000 * 60 * 60 * 24, - hostnames: {} - }) - - // Run content script on all tabs - try { - const tabs = await browser.tabs.query({ - url: ['http://*/*', 'https://*/*'] - }) - - tabs.forEach(async (tab) => { - try { - await browser.tabs.executeScript(tab.id, { - file: '../js/content.js' - }) - } catch (error) { - // - } - }) - } catch (error) { - wappalyzer.log(error, 'driver', 'error') - } -})() diff --git a/src/drivers/webextension/js/driver.js b/src/drivers/webextension/js/driver.js index 7daf679af..d83d12b94 100644 --- a/src/drivers/webextension/js/driver.js +++ b/src/drivers/webextension/js/driver.js @@ -17,6 +17,8 @@ const Driver = { lastPing: Date.now(), async init() { + chrome.runtime.onConnect.addListener(Driver.onRuntimeConnect) + await Driver.loadTechnologies() const hostnameCache = (await getOption('hostnames')) || {} @@ -31,8 +33,7 @@ const Driver = { ({ pattern: { regex, confidence, version }, match, - technology: name, - hits + technology: name }) => ({ pattern: { regex: new RegExp(regex, 'i'), @@ -42,8 +43,7 @@ const Driver = { match, technology: Wappalyzer.technologies.find( ({ name: _name }) => name === _name - ), - hits + ) }) ) } @@ -55,7 +55,6 @@ const Driver = { ads: (await getOption('ads')) || [] } - chrome.runtime.onConnect.addListener(Driver.onRuntimeConnect) chrome.webRequest.onCompleted.addListener( Driver.onWebRequestComplete, { urls: ['http://*/*', 'https://*/*'], types: ['main_frame'] }, @@ -126,6 +125,8 @@ const Driver = { }, onRuntimeConnect(port) { + Driver.log(`Connected to ${port.name}`) + port.onMessage.addListener(async ({ func, args }) => { if (!func) { return @@ -190,7 +191,7 @@ const Driver = { domain: `.${url.hostname}` }) - await Driver.onDetect(url, await analyze(href, items), language) + await Driver.onDetect(url, await analyze(href, items), language, true) } catch (error) { Driver.error(error) } @@ -200,40 +201,35 @@ const Driver = { return Wappalyzer.technologies }, - async onDetect(url, detections = [], language) { + async onDetect(url, detections = [], language, incrementHits = false) { + if (!detections.length) { + return + } + + const { hostname, href } = url + // Cache detections - // eslint-disable-next-line standard/computed-property-even-spacing - Driver.cache.hostnames[url.hostname] = { - ...(Driver.cache.hostnames[url.hostname] || { - detections: [] + const cache = (Driver.cache.hostnames[hostname] = { + ...(Driver.cache.hostnames[hostname] || { + detections: [], + hits: 0 }), dateTime: Date.now() - } - - Driver.cache.hostnames[url.hostname].language = - Driver.cache.hostnames[url.hostname].language || language + }) - detections.forEach((detection) => { - const foo = Driver.cache.hostnames[url.hostname].detections - const { - technology: { name }, - pattern: { regex } - } = detection + // Remove duplicates + cache.detections = cache.detections = cache.detections.concat(detections) - const cache = foo.find( - ({ technology: { name: _name }, pattern: { regex: _regex } }) => - name === _name && (!regex || regex) === _regex - ) + cache.detections.filter( + ({ technology: { name }, pattern: { regex } }, index) => + cache.detections.findIndex( + ({ technology: { name: _name }, pattern: { regex: _regex } }) => + name === _name && (!regex || regex.toString() === _regex.toString()) + ) === index + ) - if (cache) { - cache.hits += 1 - } else { - foo.push({ - ...detection, - hits: 1 - }) - } - }) + cache.hits += incrementHits ? 1 : 0 + cache.language = cache.language || language // Expire cache Driver.cache.hostnames = Object.keys(Driver.cache.hostnames).reduce( @@ -277,14 +273,16 @@ const Driver = { ) ) - const resolved = resolve(Driver.cache.hostnames[url.hostname].detections) + const resolved = resolve(Driver.cache.hostnames[hostname].detections) await Driver.setIcon(url, resolved) - const tabs = await promisify(chrome.tabs, 'query', { url: [url.href] }) + const tabs = await promisify(chrome.tabs, 'query', { url: [href] }) tabs.forEach(({ id }) => (Driver.cache.tabs[id] = resolved)) + Driver.log({ hostname, technologies: resolved }) + await Driver.ping() }, @@ -306,14 +304,13 @@ const Driver = { categories.some(({ id }) => id === pinnedCategory) ) - ;({ icon } = - pinned || + ;({ icon } = pinned || technologies.sort(({ categories: a }, { categories: b }) => { const max = (value) => value.reduce((max, { priority }) => Math.max(max, priority)) return max(a) > max(b) ? -1 : 1 - })[0]) + })[0] || { icon }) } const tabs = await promisify(chrome.tabs, 'query', { url: [url.href] }) @@ -437,36 +434,34 @@ const Driver = { if (tracking && termsAccepted) { const count = Object.keys(Driver.cache.hostnames).length - if (count && (count >= 50 || Driver.lastPing < Date.now() - 5000)) { + if (count && (count >= 50 || Driver.lastPing < Date.now() - expiry)) { await Driver.post( 'https://api.wappalyzer.com/ping/v1/', Object.keys(Driver.cache.hostnames).reduce((hostnames, hostname) => { - const { language, detections } = Driver.cache.hostnames[hostname] + // eslint-disable-next-line standard/computed-property-even-spacing + const { language, detections, hits } = Driver.cache.hostnames[ + hostname + ] hostnames[hostname] = hostnames[hostname] || { - applications: {}, + applications: resolve(detections).reduce( + (technologies, { name, confidence, version }) => { + if (confidence === 100) { + technologies[name] = { + version, + hits + } + + return technologies + } + }, + {} + ), meta: { language } } - resolve(detections).forEach(({ name, confidence, version }) => { - if (confidence === 100) { - console.log( - name, - detections.find( - ({ technology: { name: _name } }) => name === _name - ) - ) - hostnames[hostname].applications[name] = { - version, - hits: detections.find( - ({ technology: { name: _name } }) => name === _name - ).pattern.hits - } - } - }) - return hostnames }, {}) ) diff --git a/src/drivers/webextension/js/lib/iframe.js b/src/drivers/webextension/js/lib/iframe.js index 38ea3af38..9fd152494 100644 --- a/src/drivers/webextension/js/lib/iframe.js +++ b/src/drivers/webextension/js/lib/iframe.js @@ -124,12 +124,12 @@ var exports = {}; var port = chrome.runtime.connect({name:"adparser"}); port.onMessage.addListener((message) => { - if ( message && message.tracking_enabled ) { - - utilCallback(); - } else { - - utilElseCallback(); + if ( message && typeof message.tracking_enabled !== 'undefined' ) { + if (message.tracking_enabled) { + utilCallback(); + } else { + utilElseCallback(); + } } }); @@ -1111,7 +1111,6 @@ var exports = {}; if ( origUrl.indexOf('google.com/_/chrome/newtab') === -1 ) { var onBlockedRobotsMessage = function() { - return // TODO var log; log = _logGen.log('invalid-robotstxt', []); log.doc.finalPageUrl = log.doc.url; diff --git a/src/drivers/webextension/js/popup.bak.js b/src/drivers/webextension/js/popup.bak.js deleted file mode 100644 index 251df8200..000000000 --- a/src/drivers/webextension/js/popup.bak.js +++ /dev/null @@ -1,333 +0,0 @@ -'use strict' -/* eslint-env browser */ -/* globals chrome */ - -let pinnedCategory = null -let termsAccepted = false - -const port = chrome.runtime.connect({ - name: 'popup.js' -}) - -function slugify(string) { - return string - .toLowerCase() - .replace(/[^a-z0-9-]/g, '-') - .replace(/--+/g, '-') - .replace(/(?:^-|-$)/, '') -} - -function i18n() { - const nodes = document.querySelectorAll('[data-i18n]') - - Array.prototype.forEach.call(nodes, (node) => { - node.innerHTML = browser.i18n.getMessage(node.dataset.i18n) - }) -} - -function replaceDom(domTemplate) { - const container = document.getElementsByClassName('container')[0] - - while (container.firstChild) { - container.removeChild(container.firstChild) - } - - container.appendChild(jsonToDOM(domTemplate, document, {})) - - i18n() - - Array.from( - document.querySelectorAll('.detected__category-pin-wrapper') - ).forEach((pin) => { - pin.addEventListener('click', () => { - const categoryId = parseInt(pin.dataset.categoryId, 10) - - if (categoryId === pinnedCategory) { - pin.className = 'detected__category-pin-wrapper' - - pinnedCategory = null - } else { - const active = document.querySelector( - '.detected__category-pin-wrapper--active' - ) - - if (active) { - active.className = 'detected__category-pin-wrapper' - } - - pin.className = - 'detected__category-pin-wrapper detected__category-pin-wrapper--active' - - pinnedCategory = categoryId - } - - port.postMessage({ - id: 'set_option', - key: 'pinnedCategory', - value: pinnedCategory - }) - }) - }) - - Array.from(document.querySelectorAll('a')).forEach((link) => { - link.addEventListener('click', () => { - browser.tabs.create({ url: link.href }) - - return false - }) - }) -} - -function replaceDomWhenReady(dom) { - if (/complete|interactive|loaded/.test(document.readyState)) { - replaceDom(dom) - } else { - document.addEventListener('DOMContentLoaded', () => { - replaceDom(dom) - }) - } -} - -function appsToDomTemplate(response) { - let template = [] - - if (response.tabCache && Object.keys(response.tabCache.detected).length > 0) { - const categories = {} - - // Group apps by category - for (const appName in response.tabCache.detected) { - response.apps[appName].cats.forEach((cat) => { - categories[cat] = categories[cat] || { - name: response.categories[cat].name, - apps: [] - } - - categories[cat].apps[appName] = appName - }) - } - - for (const cat in categories) { - const apps = [] - - for (const appName in categories[cat].apps) { - const { confidenceTotal, version } = response.tabCache.detected[appName] - - apps.push([ - 'a', - { - class: 'detected__app', - href: `https://www.wappalyzer.com/technologies/${slugify( - categories[cat].name - )}/${slugify(appName)}` - }, - [ - 'img', - { - class: 'detected__app-icon', - src: `../images/icons/${response.apps[appName].icon || - 'default.svg'}` - } - ], - [ - 'span', - { - class: 'detected__app-name' - }, - appName - ], - version - ? [ - 'span', - { - class: 'detected__app-version' - }, - version - ] - : null, - confidenceTotal < 100 - ? [ - 'span', - { - class: 'detected__app-confidence' - }, - `${confidenceTotal}% sure` - ] - : null - ]) - } - - template.push([ - 'div', - { - class: 'detected__category' - }, - [ - 'div', - { - class: 'detected__category-name' - }, - [ - 'a', - { - class: 'detected__category-link', - href: `https://www.wappalyzer.com/categories/${slugify( - response.categories[cat].name - )}` - }, - browser.i18n.getMessage(`categoryName${cat}`) - ], - [ - 'span', - { - class: `detected__category-pin-wrapper${ - parseInt(pinnedCategory, 10) === parseInt(cat, 10) - ? ' detected__category-pin-wrapper--active' - : '' - }`, - 'data-category-id': cat, - title: browser.i18n.getMessage('categoryPin') - }, - [ - 'img', - { - class: 'detected__category-pin detected__category-pin--active', - src: '../images/pin-active.svg' - } - ], - [ - 'img', - { - class: - 'detected__category-pin detected__category-pin--inactive', - src: '../images/pin.svg' - } - ] - ] - ], - [ - 'div', - { - class: 'detected__apps' - }, - apps - ] - ]) - } - - template = [ - 'div', - { - class: 'detected' - }, - template - ] - } else { - template = [ - 'div', - { - class: 'empty' - }, - [ - 'span', - { - class: 'empty__text' - }, - browser.i18n.getMessage('noAppsDetected') - ] - ] - } - - return template -} - -async function getApps() { - try { - const tabs = await browser.tabs.query({ - active: true, - currentWindow: true - }) - - const url = new URL(tabs[0].url) - - document.querySelector( - '.footer__link' - ).href = `https://www.wappalyzer.com/alerts/manage?url=${encodeURIComponent( - `${url.protocol}//${url.hostname}` - )}` - - port.postMessage({ - id: 'get_apps', - tab: tabs[0] - }) - } catch (error) { - console.error(error) // eslint-disable-line no-console - } -} - -/** - * Async function to update body class based on option. - */ -function getThemeMode() { - try { - port.postMessage({ - id: 'update_theme_mode' - }) - } catch (error) { - console.error(error) // eslint-disable-line no-console - } -} - -/** - * Update theme mode based on browser option. - * @param {object} res Response from port listener. - */ -function updateThemeMode(res) { - if (res.hasOwnProperty('themeMode') && res.themeMode !== false) { - document.body.classList.add('theme-mode-sync') - } -} - -function displayApps(response) { - pinnedCategory = response.pinnedCategory // eslint-disable-line prefer-destructuring - termsAccepted = response.termsAccepted // eslint-disable-line prefer-destructuring - - if (termsAccepted) { - replaceDomWhenReady(appsToDomTemplate(response)) - } else { - i18n() - - const wrapper = document.querySelector('.terms__wrapper') - - document.querySelector('.terms__accept').addEventListener('click', () => { - port.postMessage({ - id: 'set_option', - key: 'termsAccepted', - value: true - }) - - wrapper.classList.remove('terms__wrapper--active') - - getApps() - }) - - wrapper.classList.add('terms__wrapper--active') - } -} - -port.onMessage.addListener((message) => { - switch (message.id) { - case 'get_apps': - displayApps(message.response) - - break - case 'update_theme_mode': - updateThemeMode(message.response) - - break - default: - // Do nothing - } -}) - -getThemeMode() -getApps() diff --git a/src/drivers/webextension/js/wappalyzer.bak.js b/src/drivers/webextension/js/wappalyzer.bak.js deleted file mode 100644 index e69399e87..000000000 --- a/src/drivers/webextension/js/wappalyzer.bak.js +++ /dev/null @@ -1,727 +0,0 @@ -const validation = { - hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/, - hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/ -} - -/** - * Enclose string in array - */ -function asArray(value) { - return Array.isArray(value) ? value : [value] -} - -/** - * - */ -function asyncForEach(iterable, iterator) { - return Promise.all( - (iterable || []).map( - (item) => - new Promise((resolve) => setTimeout(() => resolve(iterator(item)), 1)) - ) - ) -} - -/** - * Mark application as detected, set confidence and version - */ -function addDetected(app, pattern, type, value, key) { - app.detected = true - - // Set confidence level - app.confidence[`${type} ${key ? `${key} ` : ''}${pattern.regex}`] = - pattern.confidence === undefined ? 100 : parseInt(pattern.confidence, 10) - - // Detect version number - if (pattern.version) { - const versions = [] - const matches = pattern.regex.exec(value) - - let { version } = pattern - - if (matches) { - matches.forEach((match, i) => { - // Parse ternary operator - const ternary = new RegExp(`\\\\${i}\\?([^:]+):(.*)$`).exec(version) - - if (ternary && ternary.length === 3) { - version = version.replace(ternary[0], match ? ternary[1] : ternary[2]) - } - - // Replace back references - version = version - .trim() - .replace(new RegExp(`\\\\${i}`, 'g'), match || '') - }) - - if (version && !versions.includes(version)) { - versions.push(version) - } - - if (versions.length) { - // Use the longest detected version number - app.version = versions.reduce((a, b) => (a.length > b.length ? a : b)) - } - } - } -} - -function resolveExcludes(apps, detected) { - const excludes = [] - const detectedApps = Object.assign({}, apps, detected) - - // Exclude app in detected apps only - Object.keys(detectedApps).forEach((appName) => { - const app = detectedApps[appName] - - if (app.props.excludes) { - asArray(app.props.excludes).forEach((excluded) => { - excludes.push(excluded) - }) - } - }) - - // Remove excluded applications - Object.keys(apps).forEach((appName) => { - if (excludes.includes(appName)) { - delete apps[appName] - } - }) -} - -class Application { - constructor(name, props, detected) { - this.confidence = {} - this.confidenceTotal = 0 - this.detected = Boolean(detected) - this.excludes = [] - this.name = name - this.props = props - this.version = '' - } - - /** - * Calculate confidence total - */ - getConfidence() { - let total = 0 - - Object.keys(this.confidence).forEach((id) => { - total += this.confidence[id] - }) - - this.confidenceTotal = Math.min(total, 100) - - return this.confidenceTotal - } -} - -class Wappalyzer { - constructor() { - this.apps = {} - this.categories = {} - this.driver = {} - this.jsPatterns = {} - this.detected = {} - this.hostnameCache = { - expires: Date.now() + 1000 * 60 * 60 * 24, - hostnames: {} - } - this.adCache = [] - - this.config = { - websiteURL: 'https://www.wappalyzer.com/', - twitterURL: 'https://twitter.com/Wappalyzer', - githubURL: 'https://github.com/AliasIO/Wappalyzer' - } - } - - /** - * Log messages to console - */ - log(message, source, type) { - if (this.driver.log) { - this.driver.log(message, source || '', type || 'debug') - } - } - - analyze(url, data, context) { - const apps = {} - const promises = [] - const startTime = new Date() - const { scripts, cookies, headers, js } = data - - let { html } = data - - if (this.detected[url.canonical] === undefined) { - this.detected[url.canonical] = {} - } - - const metaTags = [] - - // Additional information - let language = null - - if (html) { - if (typeof html !== 'string') { - html = '' - } - - let matches = data.html.match( - new RegExp(']*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i') - ) - - language = matches && matches.length ? matches[1] : data.language || null - - // Meta tags - const regex = /]+>/gi - - do { - matches = regex.exec(html) - - if (!matches) { - break - } - - metaTags.push(matches[0]) - } while (matches) - } - - Object.keys(this.apps).forEach((appName) => { - apps[appName] = - this.detected[url.canonical] && this.detected[url.canonical][appName] - ? this.detected[url.canonical][appName] - : new Application(appName, this.apps[appName]) - - const app = apps[appName] - - promises.push(this.analyzeUrl(app, url)) - - if (html) { - promises.push(this.analyzeHtml(app, html)) - promises.push(this.analyzeMeta(app, metaTags)) - } - - if (scripts) { - promises.push(this.analyzeScripts(app, scripts)) - } - - if (cookies) { - promises.push(this.analyzeCookies(app, cookies)) - } - - if (headers) { - promises.push(this.analyzeHeaders(app, headers)) - } - }) - - if (js) { - Object.keys(js).forEach((appName) => { - if (typeof js[appName] !== 'function') { - promises.push(this.analyzeJs(apps[appName], js[appName])) - } - }) - } - - return new Promise(async (resolve) => { - await Promise.all(promises) - - Object.keys(apps).forEach((appName) => { - const app = apps[appName] - - if (!app.detected || !app.getConfidence()) { - delete apps[app.name] - } - }) - - resolveExcludes(apps, this.detected[url]) - this.resolveImplies(apps, url.canonical) - - this.cacheDetectedApps(apps, url.canonical) - this.trackDetectedApps(apps, url, language) - - this.log( - `Processing ${Object.keys(data).join(', ')} took ${( - (new Date() - startTime) / - 1000 - ).toFixed(2)}s (${url.hostname})`, - 'core' - ) - - if (Object.keys(apps).length) { - this.log( - `Identified ${Object.keys(apps).join(', ')} (${url.hostname})`, - 'core' - ) - } - - this.driver.displayApps( - this.detected[url.canonical], - { language }, - context - ) - - return resolve() - }) - } - - /** - * Cache detected ads - */ - cacheDetectedAds(ad) { - this.adCache.push(ad) - } - - /** - * - */ - robotsTxtAllows(url) { - return new Promise(async (resolve, reject) => { - const parsed = this.parseUrl(url) - - if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { - return reject() - } - - const robotsTxt = await this.driver.getRobotsTxt( - parsed.host, - parsed.protocol === 'https:' - ) - - if ( - robotsTxt.some( - (disallowedPath) => parsed.pathname.indexOf(disallowedPath) === 0 - ) - ) { - return reject() - } - - return resolve() - }) - } - - /** - * Parse a URL - */ - parseUrl(url) { - const a = this.driver.document.createElement('a') - - a.href = url - - a.canonical = `${a.protocol}//${a.host}${a.pathname}` - - return a - } - - /** - * - */ - static parseRobotsTxt(robotsTxt) { - const disallow = [] - - let userAgent - - robotsTxt.split('\n').forEach((line) => { - let matches = /^User-agent:\s*(.+)$/i.exec(line.trim()) - - if (matches) { - userAgent = matches[1].toLowerCase() - } else if (userAgent === '*' || userAgent === 'wappalyzer') { - matches = /^Disallow:\s*(.+)$/i.exec(line.trim()) - - if (matches) { - disallow.push(matches[1]) - } - } - }) - - return disallow - } - - /** - * - */ - ping() { - if ( - !this.hostnameCache.hostnames || - Object.keys(this.hostnameCache.hostnames).length > 50 || - this.hostnameCache.expires < Date.now() - ) { - this.driver.ping(this.hostnameCache) - - this.hostnameCache = { - expires: Date.now() + 1000 * 60 * 60 * 24, - hostnames: {} - } - } - - if (this.adCache.length > 50) { - this.driver.ping(undefined, this.adCache) - - this.adCache = [] - } - } - - /** - * Parse apps.json patterns - */ - parsePatterns(patterns) { - if (!patterns) { - return [] - } - - let parsed = {} - - // Convert string to object containing array containing string - if (typeof patterns === 'string' || Array.isArray(patterns)) { - patterns = { - main: asArray(patterns) - } - } - - Object.keys(patterns).forEach((key) => { - parsed[key] = [] - - asArray(patterns[key]).forEach((pattern) => { - const attrs = {} - - pattern.split('\\;').forEach((attr, i) => { - if (i) { - // Key value pairs - attr = attr.split(':') - - if (attr.length > 1) { - attrs[attr.shift()] = attr.join(':') - } - } else { - attrs.string = attr - - try { - attrs.regex = new RegExp(attr.replace('/', '/'), 'i') // Escape slashes in regular expression - } catch (error) { - attrs.regex = new RegExp() - - this.log(`${error.message}: ${attr}`, 'error', 'core') - } - } - }) - - parsed[key].push(attrs) - }) - }) - - // Convert back to array if the original pattern list was an array (or string) - if ('main' in parsed) { - parsed = parsed.main - } - - return parsed - } - - /** - * Parse JavaScript patterns - */ - parseJsPatterns() { - Object.keys(this.apps).forEach((appName) => { - if (this.apps[appName].js) { - this.jsPatterns[appName] = this.parsePatterns(this.apps[appName].js) - } - }) - } - - resolveImplies(apps, url) { - let checkImplies = true - - const resolve = (appName) => { - const app = apps[appName] - - if (app && app.props.implies) { - asArray(app.props.implies).forEach((implied) => { - ;[implied] = this.parsePatterns(implied) - - if (!this.apps[implied.string]) { - this.log( - `Implied application ${implied.string} does not exist`, - 'core', - 'warn' - ) - - return - } - - if (!(implied.string in apps)) { - apps[implied.string] = - this.detected[url] && this.detected[url][implied.string] - ? this.detected[url][implied.string] - : new Application( - implied.string, - this.apps[implied.string], - true - ) - - checkImplies = true - } - - // Apply app confidence to implied app - Object.keys(app.confidence).forEach((id) => { - apps[implied.string].confidence[`${id} implied by ${appName}`] = - app.confidence[id] * - (implied.confidence === undefined ? 1 : implied.confidence / 100) - }) - }) - } - } - - // Implied applications - // Run several passes as implied apps may imply other apps - while (checkImplies) { - checkImplies = false - - Object.keys(apps).forEach(resolve) - } - } - - /** - * Cache detected applications - */ - cacheDetectedApps(apps, url) { - Object.keys(apps).forEach((appName) => { - const app = apps[appName] - - // Per URL - this.detected[url][appName] = app - - Object.keys(app.confidence).forEach((id) => { - this.detected[url][appName].confidence[id] = app.confidence[id] - }) - }) - - if (this.driver.ping instanceof Function) { - this.ping() - } - } - - /** - * Track detected applications - */ - trackDetectedApps(apps, url, language) { - if (!(this.driver.ping instanceof Function)) { - return - } - - const hostname = `${url.protocol}//${url.hostname}` - - Object.keys(apps).forEach((appName) => { - const app = apps[appName] - - if (this.detected[url.canonical][appName].getConfidence() >= 100) { - if ( - validation.hostname.test(url.hostname) && - !validation.hostnameBlacklist.test(url.hostname) - ) { - if (!(hostname in this.hostnameCache.hostnames)) { - this.hostnameCache.hostnames[hostname] = { - applications: {}, - meta: {} - } - } - - if ( - !(appName in this.hostnameCache.hostnames[hostname].applications) - ) { - this.hostnameCache.hostnames[hostname].applications[appName] = { - hits: 0 - } - } - - this.hostnameCache.hostnames[hostname].applications[appName].hits += 1 - - if (apps[appName].version) { - this.hostnameCache.hostnames[hostname].applications[ - appName - ].version = app.version - } - } - } - }) - - if (hostname in this.hostnameCache.hostnames) { - this.hostnameCache.hostnames[hostname].meta.language = language - } - - this.ping() - } - - /** - * Analyze URL - */ - analyzeUrl(app, url) { - const patterns = this.parsePatterns(app.props.url) - - if (!patterns.length) { - return Promise.resolve() - } - - return asyncForEach(patterns, (pattern) => { - if (pattern.regex.test(url.canonical)) { - addDetected(app, pattern, 'url', url.canonical) - } - }) - } - - /** - * Analyze HTML - */ - analyzeHtml(app, html) { - const patterns = this.parsePatterns(app.props.html) - - if (!patterns.length) { - return Promise.resolve() - } - - return asyncForEach(patterns, (pattern) => { - if (pattern.regex.test(html)) { - addDetected(app, pattern, 'html', html) - } - }) - } - - /** - * Analyze script tag - */ - analyzeScripts(app, scripts) { - const patterns = this.parsePatterns(app.props.script) - - if (!patterns.length) { - return Promise.resolve() - } - - return asyncForEach(patterns, (pattern) => { - scripts.forEach((uri) => { - if (pattern.regex.test(uri)) { - addDetected(app, pattern, 'script', uri) - } - }) - }) - } - - /** - * Analyze meta tag - */ - analyzeMeta(app, metaTags) { - const patterns = this.parsePatterns(app.props.meta) - const promises = [] - - if (!app.props.meta) { - return Promise.resolve() - } - - metaTags.forEach((match) => { - Object.keys(patterns).forEach((meta) => { - const r = new RegExp(`(?:name|property)=["']${meta}["']`, 'i') - - if (r.test(match)) { - const content = match.match(/content=("|')([^"']+)("|')/i) - - promises.push( - asyncForEach(patterns[meta], (pattern) => { - if ( - content && - content.length === 4 && - pattern.regex.test(content[2]) - ) { - addDetected(app, pattern, 'meta', content[2], meta) - } - }) - ) - } - }) - }) - - return Promise.all(promises) - } - - /** - * Analyze response headers - */ - analyzeHeaders(app, headers) { - const patterns = this.parsePatterns(app.props.headers) - const promises = [] - - Object.keys(patterns).forEach((headerName) => { - if (typeof patterns[headerName] !== 'function') { - promises.push( - asyncForEach(patterns[headerName], (pattern) => { - headerName = headerName.toLowerCase() - - if (headerName in headers) { - headers[headerName].forEach((headerValue) => { - if (pattern.regex.test(headerValue)) { - addDetected(app, pattern, 'headers', headerValue, headerName) - } - }) - } - }) - ) - } - }) - - return promises ? Promise.all(promises) : Promise.resolve() - } - - /** - * Analyze cookies - */ - analyzeCookies(app, cookies) { - const patterns = this.parsePatterns(app.props.cookies) - const promises = [] - - Object.keys(patterns).forEach((cookieName) => { - if (typeof patterns[cookieName] !== 'function') { - const cookieNameLower = cookieName.toLowerCase() - - promises.push( - asyncForEach(patterns[cookieName], (pattern) => { - const cookie = cookies.find( - (_cookie) => _cookie.name.toLowerCase() === cookieNameLower - ) - - if (cookie && pattern.regex.test(cookie.value)) { - addDetected(app, pattern, 'cookies', cookie.value, cookieName) - } - }) - ) - } - }) - - return promises ? Promise.all(promises) : Promise.resolve() - } - - /** - * Analyze JavaScript variables - */ - analyzeJs(app, results) { - const promises = [] - - Object.keys(results).forEach((string) => { - if (typeof results[string] !== 'function') { - promises.push( - asyncForEach(Object.keys(results[string]), (index) => { - const pattern = this.jsPatterns[app.name][string][index] - const value = results[string][index] - - if (pattern && pattern.regex.test(value)) { - addDetected(app, pattern, 'js', value, string) - } - }) - ) - } - }) - - return promises ? Promise.all(promises) : Promise.resolve() - } -} - -if (typeof module === 'object') { - module.exports = Wappalyzer -} diff --git a/src/drivers/webextension/manifest.json b/src/drivers/webextension/manifest.json index e034825ac..e00572b6b 100644 --- a/src/drivers/webextension/manifest.json +++ b/src/drivers/webextension/manifest.json @@ -36,7 +36,6 @@ "https://*/*" ], "js": [ - "node_modules/webextension-polyfill/dist/browser-polyfill.js", "js/content.js" ], "run_at": "document_idle" @@ -46,12 +45,7 @@ "http://*/*", "https://*/*" ], - "exclude_matches": [ - "https://*.modirum.com/*", - "https://www.alphaecommerce.gr/*" - ], "js": [ - "node_modules/webextension-polyfill/dist/browser-polyfill.js", "js/lib/iframe.js" ], "run_at": "document_start", diff --git a/src/wappalyzer.js b/src/wappalyzer.js index e69399e87..e72eb8af7 100644 --- a/src/wappalyzer.js +++ b/src/wappalyzer.js @@ -1,727 +1,331 @@ -const validation = { - hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/, - hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/ -} - -/** - * Enclose string in array - */ -function asArray(value) { - return Array.isArray(value) ? value : [value] -} - -/** - * - */ -function asyncForEach(iterable, iterator) { - return Promise.all( - (iterable || []).map( - (item) => - new Promise((resolve) => setTimeout(() => resolve(iterator(item)), 1)) - ) - ) -} - -/** - * Mark application as detected, set confidence and version - */ -function addDetected(app, pattern, type, value, key) { - app.detected = true - - // Set confidence level - app.confidence[`${type} ${key ? `${key} ` : ''}${pattern.regex}`] = - pattern.confidence === undefined ? 100 : parseInt(pattern.confidence, 10) - - // Detect version number - if (pattern.version) { - const versions = [] - const matches = pattern.regex.exec(value) - - let { version } = pattern - - if (matches) { - matches.forEach((match, i) => { - // Parse ternary operator - const ternary = new RegExp(`\\\\${i}\\?([^:]+):(.*)$`).exec(version) - - if (ternary && ternary.length === 3) { - version = version.replace(ternary[0], match ? ternary[1] : ternary[2]) - } - - // Replace back references - version = version - .trim() - .replace(new RegExp(`\\\\${i}`, 'g'), match || '') - }) - - if (version && !versions.includes(version)) { - versions.push(version) - } +'use strict' + +const Wappalyzer = { + technologies: [], + categories: [], + + slugify(string) { + return string + .toLowerCase() + .replace(/[^a-z0-9-]/g, '-') + .replace(/--+/g, '-') + .replace(/(?:^-|-$)/, '') + }, + + getTechnology(name) { + return Wappalyzer.technologies.find(({ name: _name }) => name === _name) + }, + + getCategory(id) { + return Wappalyzer.categories.find(({ id: _id }) => id === _id) + }, + + resolve(detections = []) { + const resolved = detections.reduce((resolved, { technology }) => { + if ( + resolved.findIndex( + ({ technology: { name } }) => name === technology.name + ) === -1 + ) { + let version = '' + let confidence = 0 + + detections.forEach(({ technology: { name }, pattern, match }) => { + if (name === technology.name) { + const versionValue = Wappalyzer.resolveVersion(pattern, match) + + confidence = Math.min(100, confidence + pattern.confidence) + version = + versionValue.length > version.length && versionValue.length <= 10 + ? versionValue + : version + } + }) - if (versions.length) { - // Use the longest detected version number - app.version = versions.reduce((a, b) => (a.length > b.length ? a : b)) + resolved.push({ technology, confidence, version }) } - } - } -} - -function resolveExcludes(apps, detected) { - const excludes = [] - const detectedApps = Object.assign({}, apps, detected) - - // Exclude app in detected apps only - Object.keys(detectedApps).forEach((appName) => { - const app = detectedApps[appName] - if (app.props.excludes) { - asArray(app.props.excludes).forEach((excluded) => { - excludes.push(excluded) + return resolved + }, []) + + Wappalyzer.resolveExcludes(resolved) + Wappalyzer.resolveImplies(resolved) + + return resolved.map( + ({ + technology: { name, slug, categories, icon, website }, + confidence, + version + }) => ({ + name, + slug, + categories: categories.map((id) => Wappalyzer.getCategory(id)), + confidence, + version, + icon, + website }) - } - }) - - // Remove excluded applications - Object.keys(apps).forEach((appName) => { - if (excludes.includes(appName)) { - delete apps[appName] - } - }) -} - -class Application { - constructor(name, props, detected) { - this.confidence = {} - this.confidenceTotal = 0 - this.detected = Boolean(detected) - this.excludes = [] - this.name = name - this.props = props - this.version = '' - } - - /** - * Calculate confidence total - */ - getConfidence() { - let total = 0 - - Object.keys(this.confidence).forEach((id) => { - total += this.confidence[id] - }) - - this.confidenceTotal = Math.min(total, 100) - - return this.confidenceTotal - } -} - -class Wappalyzer { - constructor() { - this.apps = {} - this.categories = {} - this.driver = {} - this.jsPatterns = {} - this.detected = {} - this.hostnameCache = { - expires: Date.now() + 1000 * 60 * 60 * 24, - hostnames: {} - } - this.adCache = [] - - this.config = { - websiteURL: 'https://www.wappalyzer.com/', - twitterURL: 'https://twitter.com/Wappalyzer', - githubURL: 'https://github.com/AliasIO/Wappalyzer' - } - } - - /** - * Log messages to console - */ - log(message, source, type) { - if (this.driver.log) { - this.driver.log(message, source || '', type || 'debug') - } - } - - analyze(url, data, context) { - const apps = {} - const promises = [] - const startTime = new Date() - const { scripts, cookies, headers, js } = data + ) + }, - let { html } = data + resolveVersion({ version, regex }, match) { + let resolved = version - if (this.detected[url.canonical] === undefined) { - this.detected[url.canonical] = {} - } + if (version) { + const matches = regex.exec(match) - const metaTags = [] + if (matches) { + matches.forEach((match, index) => { + // Parse ternary operator + const ternary = new RegExp(`\\\\${index}\\?([^:]+):(.*)$`).exec( + version + ) - // Additional information - let language = null + if (ternary && ternary.length === 3) { + resolved = version.replace( + ternary[0], + match ? ternary[1] : ternary[2] + ) + } - if (html) { - if (typeof html !== 'string') { - html = '' + // Replace back references + resolved = resolved + .trim() + .replace(new RegExp(`\\\\${index}`, 'g'), match || '') + }) } - - let matches = data.html.match( - new RegExp(']*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i') - ) - - language = matches && matches.length ? matches[1] : data.language || null - - // Meta tags - const regex = /]+>/gi - - do { - matches = regex.exec(html) - - if (!matches) { - break - } - - metaTags.push(matches[0]) - } while (matches) } - Object.keys(this.apps).forEach((appName) => { - apps[appName] = - this.detected[url.canonical] && this.detected[url.canonical][appName] - ? this.detected[url.canonical][appName] - : new Application(appName, this.apps[appName]) + return resolved + }, - const app = apps[appName] + resolveExcludes(resolved) { + resolved.forEach(({ technology }) => { + technology.excludes.forEach((name) => { + const excluded = Wappalyzer.getTechnology(name) - promises.push(this.analyzeUrl(app, url)) - - if (html) { - promises.push(this.analyzeHtml(app, html)) - promises.push(this.analyzeMeta(app, metaTags)) - } - - if (scripts) { - promises.push(this.analyzeScripts(app, scripts)) - } - - if (cookies) { - promises.push(this.analyzeCookies(app, cookies)) - } - - if (headers) { - promises.push(this.analyzeHeaders(app, headers)) - } - }) - - if (js) { - Object.keys(js).forEach((appName) => { - if (typeof js[appName] !== 'function') { - promises.push(this.analyzeJs(apps[appName], js[appName])) + if (!excluded) { + throw new Error(`Excluded technology does not exist: ${name}`) } - }) - } - - return new Promise(async (resolve) => { - await Promise.all(promises) - Object.keys(apps).forEach((appName) => { - const app = apps[appName] + const index = resolved.findIndex(({ name }) => name === excluded.name) - if (!app.detected || !app.getConfidence()) { - delete apps[app.name] + if (index === -1) { + resolved.splice(index, 1) } }) - - resolveExcludes(apps, this.detected[url]) - this.resolveImplies(apps, url.canonical) - - this.cacheDetectedApps(apps, url.canonical) - this.trackDetectedApps(apps, url, language) - - this.log( - `Processing ${Object.keys(data).join(', ')} took ${( - (new Date() - startTime) / - 1000 - ).toFixed(2)}s (${url.hostname})`, - 'core' - ) - - if (Object.keys(apps).length) { - this.log( - `Identified ${Object.keys(apps).join(', ')} (${url.hostname})`, - 'core' - ) - } - - this.driver.displayApps( - this.detected[url.canonical], - { language }, - context - ) - - return resolve() }) - } + }, - /** - * Cache detected ads - */ - cacheDetectedAds(ad) { - this.adCache.push(ad) - } - - /** - * - */ - robotsTxtAllows(url) { - return new Promise(async (resolve, reject) => { - const parsed = this.parseUrl(url) + resolveImplies(resolved) { + let done = false - if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { - return reject() - } - - const robotsTxt = await this.driver.getRobotsTxt( - parsed.host, - parsed.protocol === 'https:' - ) - - if ( - robotsTxt.some( - (disallowedPath) => parsed.pathname.indexOf(disallowedPath) === 0 - ) - ) { - return reject() - } - - return resolve() - }) - } - - /** - * Parse a URL - */ - parseUrl(url) { - const a = this.driver.document.createElement('a') - - a.href = url - - a.canonical = `${a.protocol}//${a.host}${a.pathname}` - - return a - } - - /** - * - */ - static parseRobotsTxt(robotsTxt) { - const disallow = [] - - let userAgent - - robotsTxt.split('\n').forEach((line) => { - let matches = /^User-agent:\s*(.+)$/i.exec(line.trim()) - - if (matches) { - userAgent = matches[1].toLowerCase() - } else if (userAgent === '*' || userAgent === 'wappalyzer') { - matches = /^Disallow:\s*(.+)$/i.exec(line.trim()) - - if (matches) { - disallow.push(matches[1]) - } - } - }) - - return disallow - } - - /** - * - */ - ping() { - if ( - !this.hostnameCache.hostnames || - Object.keys(this.hostnameCache.hostnames).length > 50 || - this.hostnameCache.expires < Date.now() - ) { - this.driver.ping(this.hostnameCache) - - this.hostnameCache = { - expires: Date.now() + 1000 * 60 * 60 * 24, - hostnames: {} - } - } + while (resolved.length && !done) { + resolved.forEach(({ technology, confidence }) => { + done = true - if (this.adCache.length > 50) { - this.driver.ping(undefined, this.adCache) + technology.implies.forEach((name) => { + const implied = Wappalyzer.getTechnology(name) - this.adCache = [] - } - } - - /** - * Parse apps.json patterns - */ - parsePatterns(patterns) { - if (!patterns) { - return [] - } - - let parsed = {} - - // Convert string to object containing array containing string - if (typeof patterns === 'string' || Array.isArray(patterns)) { - patterns = { - main: asArray(patterns) - } - } - - Object.keys(patterns).forEach((key) => { - parsed[key] = [] - - asArray(patterns[key]).forEach((pattern) => { - const attrs = {} - - pattern.split('\\;').forEach((attr, i) => { - if (i) { - // Key value pairs - attr = attr.split(':') - - if (attr.length > 1) { - attrs[attr.shift()] = attr.join(':') - } - } else { - attrs.string = attr + if (!implied) { + throw new Error(`Implied technology does not exist: ${name}`) + } - try { - attrs.regex = new RegExp(attr.replace('/', '/'), 'i') // Escape slashes in regular expression - } catch (error) { - attrs.regex = new RegExp() + if ( + resolved.findIndex( + ({ technology: { name } }) => name === implied.name + ) === -1 + ) { + resolved.push({ technology: implied, confidence, version: '' }) - this.log(`${error.message}: ${attr}`, 'error', 'core') - } + done = false } }) - - parsed[key].push(attrs) }) - }) - - // Convert back to array if the original pattern list was an array (or string) - if ('main' in parsed) { - parsed = parsed.main } - - return parsed - } - - /** - * Parse JavaScript patterns - */ - parseJsPatterns() { - Object.keys(this.apps).forEach((appName) => { - if (this.apps[appName].js) { - this.jsPatterns[appName] = this.parsePatterns(this.apps[appName].js) - } - }) - } - - resolveImplies(apps, url) { - let checkImplies = true - - const resolve = (appName) => { - const app = apps[appName] - - if (app && app.props.implies) { - asArray(app.props.implies).forEach((implied) => { - ;[implied] = this.parsePatterns(implied) - - if (!this.apps[implied.string]) { - this.log( - `Implied application ${implied.string} does not exist`, - 'core', - 'warn' + }, + + async analyze(url, { html, meta, headers, cookies, scripts }) { + const oo = Wappalyzer.analyzeOneToOne + const om = Wappalyzer.analyzeOneToMany + const mm = Wappalyzer.analyzeManyToMany + + const flatten = (array) => Array.prototype.concat.apply([], array) + + try { + const detections = flatten( + flatten( + await Promise.all( + Wappalyzer.technologies.map((technology) => + Promise.all([ + oo(technology, 'url', url), + oo(technology, 'html', html), + om(technology, 'meta', meta), + mm(technology, 'headers', headers), + om(technology, 'cookies', cookies), + om(technology, 'scripts', scripts) + ]) ) + ) + ) + ).filter((technology) => technology) - return - } - - if (!(implied.string in apps)) { - apps[implied.string] = - this.detected[url] && this.detected[url][implied.string] - ? this.detected[url][implied.string] - : new Application( - implied.string, - this.apps[implied.string], - true - ) - - checkImplies = true - } - - // Apply app confidence to implied app - Object.keys(app.confidence).forEach((id) => { - apps[implied.string].confidence[`${id} implied by ${appName}`] = - app.confidence[id] * - (implied.confidence === undefined ? 1 : implied.confidence / 100) - }) - }) - } - } - - // Implied applications - // Run several passes as implied apps may imply other apps - while (checkImplies) { - checkImplies = false - - Object.keys(apps).forEach(resolve) + return detections + } catch (error) { + throw new Error(error.message || error.toString()) } - } + }, + + setTechnologies(data) { + const transform = Wappalyzer.transformPatterns + + Wappalyzer.technologies = Object.keys(data).reduce((technologies, name) => { + const { + cats, + url, + html, + meta, + headers, + cookies, + script, + js, + implies, + excludes, + icon, + website + } = data[name] + + technologies.push({ + name, + categories: cats || [], + slug: Wappalyzer.slugify(name), + url: transform(url), + headers: transform( + Object.keys(headers || {}).reduce( + (lcHeaders, header) => ({ + ...lcHeaders, + [header.toLowerCase()]: headers[header] + }), + {} + ) + ), + cookies: transform(cookies), + html: transform(html), + meta: transform(meta), + scripts: transform(script), + js: transform(js), + implies: typeof implies === 'string' ? [implies] : implies || [], + excludes: typeof excludes === 'string' ? [excludes] : excludes || [], + icon: icon || 'default.svg', + website: website || '' + }) - /** - * Cache detected applications - */ - cacheDetectedApps(apps, url) { - Object.keys(apps).forEach((appName) => { - const app = apps[appName] + return technologies + }, []) + }, - // Per URL - this.detected[url][appName] = app + setCategories(data) { + Wappalyzer.categories = Object.keys(data) + .reduce((categories, id) => { + const category = data[id] - Object.keys(app.confidence).forEach((id) => { - this.detected[url][appName].confidence[id] = app.confidence[id] - }) - }) + categories.push({ + id: parseInt(id, 10), + slug: Wappalyzer.slugify(category.name), + ...category + }) - if (this.driver.ping instanceof Function) { - this.ping() - } - } + return categories + }, []) + .sort(({ priority: a }, { priority: b }) => (a > b ? -1 : 0)) + }, - /** - * Track detected applications - */ - trackDetectedApps(apps, url, language) { - if (!(this.driver.ping instanceof Function)) { - return + transformPatterns(patterns) { + if (!patterns) { + return [] } - const hostname = `${url.protocol}//${url.hostname}` + const toArray = (value) => (Array.isArray(value) ? value : [value]) - Object.keys(apps).forEach((appName) => { - const app = apps[appName] - - if (this.detected[url.canonical][appName].getConfidence() >= 100) { - if ( - validation.hostname.test(url.hostname) && - !validation.hostnameBlacklist.test(url.hostname) - ) { - if (!(hostname in this.hostnameCache.hostnames)) { - this.hostnameCache.hostnames[hostname] = { - applications: {}, - meta: {} - } - } + if (typeof patterns === 'string' || Array.isArray(patterns)) { + patterns = { main: patterns } + } - if ( - !(appName in this.hostnameCache.hostnames[hostname].applications) - ) { - this.hostnameCache.hostnames[hostname].applications[appName] = { - hits: 0 + const parsed = Object.keys(patterns).reduce((parsed, key) => { + parsed[key] = toArray(patterns[key]).map((pattern) => { + const { regex, confidence, version } = pattern + .split('\\;') + .reduce((attrs, attr, i) => { + if (i) { + // Key value pairs + attr = attr.split(':') + + if (attr.length > 1) { + attrs[attr.shift()] = attr.join(':') + } + } else { + // Escape slashes in regular expression + attrs.regex = new RegExp(attr.replace(/\//g, '\\/'), 'i') } - } - this.hostnameCache.hostnames[hostname].applications[appName].hits += 1 + return attrs + }, {}) - if (apps[appName].version) { - this.hostnameCache.hostnames[hostname].applications[ - appName - ].version = app.version - } + return { + regex, + confidence: parseInt(confidence || 100, 10), + version: version || '' } - } - }) - - if (hostname in this.hostnameCache.hostnames) { - this.hostnameCache.hostnames[hostname].meta.language = language - } - - this.ping() - } - - /** - * Analyze URL - */ - analyzeUrl(app, url) { - const patterns = this.parsePatterns(app.props.url) - - if (!patterns.length) { - return Promise.resolve() - } - - return asyncForEach(patterns, (pattern) => { - if (pattern.regex.test(url.canonical)) { - addDetected(app, pattern, 'url', url.canonical) - } - }) - } + }) - /** - * Analyze HTML - */ - analyzeHtml(app, html) { - const patterns = this.parsePatterns(app.props.html) + return parsed + }, {}) - if (!patterns.length) { - return Promise.resolve() - } + return 'main' in parsed ? parsed.main : parsed + }, - return asyncForEach(patterns, (pattern) => { - if (pattern.regex.test(html)) { - addDetected(app, pattern, 'html', html) + analyzeOneToOne(technology, type, value) { + return technology[type].reduce((technologies, pattern) => { + if (pattern.regex.test(value)) { + technologies.push({ technology, pattern, match: value }) } - }) - } - /** - * Analyze script tag - */ - analyzeScripts(app, scripts) { - const patterns = this.parsePatterns(app.props.script) + return technologies + }, []) + }, - if (!patterns.length) { - return Promise.resolve() - } + analyzeOneToMany(technology, type, items = []) { + return items.reduce((technologies, { key, value }) => { + const patterns = technology[type][key] || [] - return asyncForEach(patterns, (pattern) => { - scripts.forEach((uri) => { - if (pattern.regex.test(uri)) { - addDetected(app, pattern, 'script', uri) + patterns.forEach((pattern) => { + if (pattern.regex.test(value)) { + technologies.push({ technology, pattern, match: value }) } }) - }) - } - /** - * Analyze meta tag - */ - analyzeMeta(app, metaTags) { - const patterns = this.parsePatterns(app.props.meta) - const promises = [] + return technologies + }, []) + }, - if (!app.props.meta) { - return Promise.resolve() - } + analyzeManyToMany(technology, type, items = {}) { + return Object.keys(technology[type]).reduce((technologies, key) => { + const patterns = technology[type][key] || [] + const values = items[key] || [] - metaTags.forEach((match) => { - Object.keys(patterns).forEach((meta) => { - const r = new RegExp(`(?:name|property)=["']${meta}["']`, 'i') - - if (r.test(match)) { - const content = match.match(/content=("|')([^"']+)("|')/i) - - promises.push( - asyncForEach(patterns[meta], (pattern) => { - if ( - content && - content.length === 4 && - pattern.regex.test(content[2]) - ) { - addDetected(app, pattern, 'meta', content[2], meta) - } - }) - ) - } + patterns.forEach((pattern) => { + values.forEach((value) => { + if (pattern.regex.test(value)) { + technologies.push({ technology, pattern, match: value }) + } + }) }) - }) - - return Promise.all(promises) - } - - /** - * Analyze response headers - */ - analyzeHeaders(app, headers) { - const patterns = this.parsePatterns(app.props.headers) - const promises = [] - - Object.keys(patterns).forEach((headerName) => { - if (typeof patterns[headerName] !== 'function') { - promises.push( - asyncForEach(patterns[headerName], (pattern) => { - headerName = headerName.toLowerCase() - - if (headerName in headers) { - headers[headerName].forEach((headerValue) => { - if (pattern.regex.test(headerValue)) { - addDetected(app, pattern, 'headers', headerValue, headerName) - } - }) - } - }) - ) - } - }) - - return promises ? Promise.all(promises) : Promise.resolve() - } - - /** - * Analyze cookies - */ - analyzeCookies(app, cookies) { - const patterns = this.parsePatterns(app.props.cookies) - const promises = [] - - Object.keys(patterns).forEach((cookieName) => { - if (typeof patterns[cookieName] !== 'function') { - const cookieNameLower = cookieName.toLowerCase() - - promises.push( - asyncForEach(patterns[cookieName], (pattern) => { - const cookie = cookies.find( - (_cookie) => _cookie.name.toLowerCase() === cookieNameLower - ) - - if (cookie && pattern.regex.test(cookie.value)) { - addDetected(app, pattern, 'cookies', cookie.value, cookieName) - } - }) - ) - } - }) - - return promises ? Promise.all(promises) : Promise.resolve() - } - - /** - * Analyze JavaScript variables - */ - analyzeJs(app, results) { - const promises = [] - - Object.keys(results).forEach((string) => { - if (typeof results[string] !== 'function') { - promises.push( - asyncForEach(Object.keys(results[string]), (index) => { - const pattern = this.jsPatterns[app.name][string][index] - const value = results[string][index] - - if (pattern && pattern.regex.test(value)) { - addDetected(app, pattern, 'js', value, string) - } - }) - ) - } - }) - return promises ? Promise.all(promises) : Promise.resolve() + return technologies + }, []) } } -if (typeof module === 'object') { +if (typeof module !== 'undefined') { module.exports = Wappalyzer }