From 2aca8275c5b73673f3cd0dc70fa8bcdd98050520 Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Wed, 3 Jun 2020 15:43:28 +1000 Subject: [PATCH 1/7] Refactoring --- src/drivers/webextension/html/background.html | 5 +- src/drivers/webextension/js/content.js | 47 +-- src/drivers/webextension/js/driver.js | 8 +- src/drivers/webextension/js/driver2.js | 285 +++++++++++++++ src/drivers/webextension/js/wappalyzer2.js | 326 ++++++++++++++++++ src/wappalyzer.js | 8 - src/wappalyzer.spec.js | 289 ++++++++-------- 7 files changed, 785 insertions(+), 183 deletions(-) create mode 100644 src/drivers/webextension/js/driver2.js create mode 100644 src/drivers/webextension/js/wappalyzer2.js diff --git a/src/drivers/webextension/html/background.html b/src/drivers/webextension/html/background.html index 8a03c7a9d..f6147dc56 100644 --- a/src/drivers/webextension/html/background.html +++ b/src/drivers/webextension/html/background.html @@ -4,9 +4,8 @@
- - - + + diff --git a/src/drivers/webextension/js/content.js b/src/drivers/webextension/js/content.js index 66d620f4a..3be7fa391 100644 --- a/src/drivers/webextension/js/content.js +++ b/src/drivers/webextension/js/content.js @@ -1,20 +1,14 @@ -/** global: browser */ -/** global: XMLSerializer */ - -/* global browser */ +'use strict' /* eslint-env browser */ +/* globals chrome */ -const port = browser.runtime.connect({ - name: 'content.js' -}) +const port = chrome.runtime.connect({ name: 'content.js' }) ;(async function() { - if (typeof browser !== 'undefined' && typeof document.body !== 'undefined') { + if (typeof chrome !== 'undefined' && typeof document.body !== 'undefined') { await new Promise((resolve) => setTimeout(resolve, 1000)) try { - port.postMessage({ id: 'init' }) - // HTML let html = new XMLSerializer().serializeToString(document) @@ -23,9 +17,7 @@ const port = browser.runtime.connect({ const maxRows = 3000 const rows = html.length / maxCols - let i - - for (i = 0; i < rows; i += 1) { + for (let i = 0; i < rows; i += 1) { if (i < maxRows / 2 || i > rows - maxRows / 2) { chunks.push(html.slice(i * maxCols, (i + 1) * maxCols)) } @@ -34,13 +26,23 @@ const port = browser.runtime.connect({ html = chunks.join('\n') // Scripts - const scripts = Array.prototype.slice - .apply(document.scripts) - .filter((script) => script.src) - .map((script) => script.src) + const scripts = Array.from(document.scripts) + .filter(({ src }) => src) + .map(({ src }) => src) .filter((script) => script.indexOf('data:text/javascript;') !== 0) - port.postMessage({ id: 'analyze', subject: { html, scripts } }) + // Meta + const meta = Array.from(document.querySelectorAll('meta')) + .map((meta) => ({ + key: meta.getAttribute('name') || meta.getAttribute('property'), + value: meta.getAttribute('content') + })) + .filter(({ value }) => value) + + port.postMessage({ + func: 'onContentLoad', + args: [location.href, { html, scripts, meta }] + }) // JavaScript variables const script = document.createElement('script') @@ -53,7 +55,10 @@ const port = browser.runtime.connect({ window.removeEventListener('message', onMessage) - port.postMessage({ id: 'analyze', subject: { js: event.data.js } }) + port.postMessage({ + func: 'analyze', + args: [new URL(location.href), { js: event.data.js }] + }) script.remove() } @@ -63,11 +68,11 @@ const port = browser.runtime.connect({ port.postMessage({ id: 'get_js_patterns' }) } - script.setAttribute('src', browser.extension.getURL('js/inject.js')) + script.setAttribute('src', chrome.extension.getURL('js/inject.js')) document.body.appendChild(script) } catch (error) { - port.postMessage({ id: 'log', subject: error }) + port.postMessage({ func: 'error', args: [error, 'content.js'] }) } } })() diff --git a/src/drivers/webextension/js/driver.js b/src/drivers/webextension/js/driver.js index 9742f8906..0ff7d491c 100644 --- a/src/drivers/webextension/js/driver.js +++ b/src/drivers/webextension/js/driver.js @@ -410,10 +410,10 @@ wappalyzer.driver.ping = async ( url: `${wappalyzer.config.websiteURL}installed` }) } else if (version !== previousVersion && upgradeMessage) { - // openTab({ - // url: `${wappalyzer.config.websiteURL}upgraded?v${version}`, - // background: true - // }) + openTab({ + url: `${wappalyzer.config.websiteURL}upgraded?v${version}`, + background: true + }) } await setOption('version', version) diff --git a/src/drivers/webextension/js/driver2.js b/src/drivers/webextension/js/driver2.js new file mode 100644 index 000000000..28883ee66 --- /dev/null +++ b/src/drivers/webextension/js/driver2.js @@ -0,0 +1,285 @@ +'use strict' +/* eslint-env browser */ +/* globals chrome, Wappalyzer */ + +const { setTechnologies, setCategories, analyze, resolve, unique } = Wappalyzer + +function promisify(context, method, ...args) { + return new Promise((resolve, reject) => { + context[method](...args, (...args) => { + if (chrome.runtime.lastError) { + return reject(chrome.runtime.lastError) + } + + resolve(...args) + }) + }) +} + +const Driver = { + cache: { + hostnames: {}, + robots: {} + }, + + agent: chrome.extension.getURL('/').startsWith('moz-') ? 'firefox' : 'chrome', + + log(message, source = 'driver', type = 'log') { + // eslint-disable-next-line no-console + console[type](`wappalyzer | ${source} |`, message) + }, + + warn(message, source = 'driver') { + Driver.log(message, source, 'warn') + }, + + error(error, source = 'driver') { + Driver.log(error, source, 'error') + }, + + open(url, active = true) { + chrome.tabs.create({ url, active }) + }, + + async loadTechnologies() { + try { + const { apps: technologies, categories } = await ( + await fetch(chrome.extension.getURL('apps.json')) + ).json() + + setTechnologies(technologies) + setCategories(categories) + } catch (error) { + Driver.error(error) + } + }, + + post(url, body) { + try { + return fetch(url, { + method: 'POST', + body: JSON.stringify(body) + }) + } catch (error) { + throw new Error(error.message || error.toString()) + } + }, + + async getOption(name, defaultValue = null) { + try { + const option = await promisify(chrome.storage.local, 'get', name) + + if (option[name] !== undefined) { + return option[name] + } + + return defaultValue + } catch (error) { + throw new Error(error.message || error.toString()) + } + }, + + async setOption(name, value) { + try { + await promisify(chrome.storage.local, 'set', { + [name]: value + }) + } catch (error) { + throw new Error(error.message || error.toString()) + } + }, + + onRuntimeConnect(port) { + port.onMessage.addListener(async (message) => { + const { func, args } = message + + if (!func || !port.sender.tab) { + return + } + + Driver.log(`Message received from ${port.name}: ${func}`) + + await Driver[func](...args) + + /* + const pinnedCategory = await getOption('pinnedCategory') + + const url = new URL(port.sender.tab.url) + + const cookies = await browser.cookies.getAll({ + domain: `.${url.hostname}` + }) + + let response + + switch (message.id) { + case 'log': + wappalyzer.log(message.subject, message.source) + + break + case 'analyze': + if (message.subject.html) { + browser.i18n + .detectLanguage(message.subject.html) + .then(({ languages }) => { + const language = languages + .filter(({ percentage }) => percentage >= 75) + .map(({ language: lang }) => lang)[0] + + message.subject.language = language + + wappalyzer.analyze(url, message.subject, { + tab: port.sender.tab + }) + }) + } else { + wappalyzer.analyze(url, message.subject, { tab: port.sender.tab }) + } + + await setOption('hostnameCache', wappalyzer.hostnameCache) + + break + case 'ad_log': + wappalyzer.cacheDetectedAds(message.subject) + + break + case 'get_apps': + response = { + tabCache: tabCache[message.tab.id], + apps: wappalyzer.apps, + categories: wappalyzer.categories, + pinnedCategory, + termsAccepted: + userAgent() === 'chrome' || + (await getOption('termsAccepted', false)) + } + + break + case 'set_option': + await setOption(message.key, message.value) + + break + case 'get_js_patterns': + response = { + patterns: wappalyzer.jsPatterns + } + + break + case 'update_theme_mode': + // Sync theme mode to popup. + response = { + themeMode: await getOption('themeMode', false) + } + + break + default: + // Do nothing + } + + if (response) { + port.postMessage({ + id: message.id, + response + }) + } + }) + */ + }) + }, + + async onWebRequestComplete(request) { + if (request.responseHeaders) { + const headers = {} + + try { + const url = new URL(request.url) + + const [tab] = await promisify(chrome.tabs, 'query', { url: [url.href] }) + + if (tab) { + request.responseHeaders.forEach((header) => { + const name = header.name.toLowerCase() + + headers[name] = headers[name] || [] + + headers[name].push( + (header.value || header.binaryValue || '').toString() + ) + }) + + if ( + headers['content-type'] && + /\/x?html/.test(headers['content-type'][0]) + ) { + await Driver.onDetect(url, await analyze(url, { headers }, { tab })) + } + } + } catch (error) { + Driver.error(error) + } + } + }, + + async onContentLoad(href, items) { + try { + const url = new URL(href) + + items.cookies = await promisify(chrome.cookies, 'getAll', { + domain: `.${url.hostname}` + }) + + await Driver.onDetect(url, await analyze(url, items)) + } catch (error) { + Driver.error(error) + } + }, + + async onDetect(url, detections = []) { + Driver.cache.hostnames[url.hostname] = unique([ + ...(Driver.cache.hostnames[url.hostname] || []), + ...detections + ]) + + const resolved = resolve(Driver.cache.hostnames[url.hostname]) + + const pinnedCategory = parseInt( + await Driver.getOption('pinnedCategory'), + 10 + ) + + const pinned = resolved.find(({ categories }) => + categories.some(({ id }) => id === pinnedCategory) + ) + + const { icon } = + pinned || + resolved.sort(({ categories: a }, { categories: b }) => { + const max = (value) => + value.reduce((max, { priority }) => Math.max(max, priority)) + + return max(a) > max(b) ? -1 : 1 + })[0] + + const tabs = await promisify(chrome.tabs, 'query', { url: [url.href] }) + + await Promise.all( + tabs.map(({ id: tabId }) => + promisify(chrome.pageAction, 'setIcon', { + tabId, + path: chrome.extension.getURL(`../images/icons/${icon}`) + }) + ) + ) + } +} + +;(async function() { + await Driver.loadTechnologies() + + chrome.runtime.onConnect.addListener(Driver.onRuntimeConnect) + chrome.webRequest.onCompleted.addListener( + Driver.onWebRequestComplete, + { urls: ['http://*/*', 'https://*/*'], types: ['main_frame'] }, + ['responseHeaders'] + ) +})() diff --git a/src/drivers/webextension/js/wappalyzer2.js b/src/drivers/webextension/js/wappalyzer2.js new file mode 100644 index 000000000..ee8049a80 --- /dev/null +++ b/src/drivers/webextension/js/wappalyzer2.js @@ -0,0 +1,326 @@ +'use strict' + +const Wappalyzer = { + technologies: [], + categories: [], + + slugify(string) { + return string + .toLowerCase() + .replace(/[^a-z0-9-]/g, '-') + .replace(/--+/g, '-') + .replace(/(?:^-|-$)/, '') + }, + + unique(detections) { + return detections.filter( + ({ technology: { name }, pattern: { regex } }, index) => { + return ( + detections.findIndex( + ({ technology: { name: _name }, pattern: { regex: _regex } }) => + name === _name && (!regex || regex === _regex) + ) === index + ) + } + ) + }, + + getTechnology(name) { + return Wappalyzer.technologies.find(({ name: _name }) => name === _name) + }, + + getCategory(id) { + return Wappalyzer.categories.find(({ id: _id }) => id === _id) + }, + + resolve(detections) { + const resolved = detections.reduce((resolved, { technology }) => { + if ( + resolved.findIndex( + ({ technology: { name } }) => name === technology.name + ) === -1 + ) { + let version = '' + let confidence = 0 + + detections.forEach(({ technology: { name }, pattern, match }) => { + if (name === technology.name) { + const versionValue = Wappalyzer.resolveVersion(pattern, match) + + confidence = Math.min(100, confidence + pattern.confidence) + version = + versionValue.length > version.length && versionValue.length <= 10 + ? versionValue + : version + } + }) + + resolved.push({ technology, confidence, version }) + } + + return resolved + }, []) + + Wappalyzer.resolveExcludes(resolved) + Wappalyzer.resolveImplies(resolved) + + return resolved.map( + ({ + technology: { name, slug, categories, icon, website }, + confidence, + version + }) => ({ + name, + slug, + categories: categories.map((id) => Wappalyzer.getCategory(id)), + confidence, + version, + icon, + website + }) + ) + }, + + resolveVersion({ version, regex }, match) { + let resolved = version + + if (version) { + const matches = regex.exec(match) + + if (matches) { + matches.forEach((match, index) => { + // Parse ternary operator + const ternary = new RegExp(`\\\\${index}\\?([^:]+):(.*)$`).exec( + version + ) + + if (ternary && ternary.length === 3) { + resolved = version.replace( + ternary[0], + match ? ternary[1] : ternary[2] + ) + } + + // Replace back references + resolved = resolved + .trim() + .replace(new RegExp(`\\\\${index}`, 'g'), match || '') + }) + } + } + + return resolved + }, + + resolveExcludes(resolved) { + resolved.forEach(({ technology }) => { + technology.excludes.forEach((name) => { + const excluded = Wappalyzer.getTechnology(name) + + const index = resolved.findIndex(({ name }) => name === excluded.name) + + if (index === -1) { + resolved.splice(index, 1) + } + }) + }) + }, + + resolveImplies(resolved) { + let done = false + + while (!done) { + resolved.forEach(({ technology, confidence }) => { + done = true + + technology.implies.forEach((name) => { + const implied = Wappalyzer.getTechnology(name) + + if ( + resolved.findIndex( + ({ technology: { name } }) => name === implied.name + ) === -1 + ) { + resolved.push({ technology: implied, confidence, version: '' }) + + done = false + } + }) + }) + } + }, + + async analyze(url, { html, meta, headers, cookies, scripts, js }) { + const oo = Wappalyzer.analyzeOneToOne + const om = Wappalyzer.analyzeOneToMany + const mm = Wappalyzer.analyzeManyToMany + + const flatten = (array) => Array.prototype.concat.apply([], array) + + try { + const detections = flatten( + flatten( + await Promise.all( + Wappalyzer.technologies.map((technology) => + Promise.all([ + oo(technology, 'url', url), + oo(technology, 'html', html), + om(technology, 'meta', meta), + mm(technology, 'headers', headers), + om(technology, 'cookies', cookies), + om(technology, 'scripts', scripts) + ]) + ) + ) + ) + ).filter((technology) => technology) + + return detections + } catch (error) { + throw new Error(error.message || error.toString()) + } + }, + + setTechnologies(data) { + const transform = Wappalyzer.transformPatterns + + Wappalyzer.technologies = Object.keys(data).reduce((technologies, name) => { + const { + cats, + url, + html, + meta, + headers, + cookies, + script, + implies, + excludes, + icon, + website + } = data[name] + + technologies.push({ + name, + categories: cats || [], + slug: Wappalyzer.slugify(name), + url: transform(url), + headers: transform(headers), + cookies: transform(cookies), + html: transform(html), + meta: transform(meta), + scripts: transform(script), + implies: typeof implies === 'string' ? [implies] : implies || [], + excludes: typeof excludes === 'string' ? [excludes] : excludes || [], + icon: icon || 'default.svg', + website: website || '' + }) + + return technologies + }, []) + }, + + setCategories(data) { + Wappalyzer.categories = Object.keys(data) + .reduce((categories, id) => { + const category = data[id] + + categories.push({ + id: parseInt(id, 10), + slug: Wappalyzer.slugify(category.name), + ...category + }) + + return categories + }, []) + .sort(({ priority: a }, { priority: b }) => (a > b ? -1 : 0)) + }, + + transformPatterns(patterns) { + if (!patterns) { + return [] + } + + const toArray = (value) => (Array.isArray(value) ? value : [value]) + + if (typeof patterns === 'string' || Array.isArray(patterns)) { + patterns = { main: patterns } + } + + const parsed = Object.keys(patterns).reduce((parsed, key) => { + parsed[key.toLowerCase()] = toArray(patterns[key]).map((pattern) => { + const { regex, confidence, version } = pattern + .split('\\;') + .reduce((attrs, attr, i) => { + if (i) { + // Key value pairs + attr = attr.split(':') + + if (attr.length > 1) { + attrs[attr.shift()] = attr.join(':') + } + } else { + // Escape slashes in regular expression + attrs.regex = new RegExp(attr.replace(/\//g, '\\/'), 'i') + } + + return attrs + }, {}) + + return { + regex, + confidence: parseInt(confidence || 100, 10), + version: version || '' + } + }) + + return parsed + }, {}) + + return 'main' in parsed ? parsed.main : parsed + }, + + analyzeOneToOne(technology, type, value) { + return technology[type].reduce((technologies, pattern) => { + if (pattern.regex.test(value)) { + technologies.push({ technology, pattern, match: value }) + } + + return technologies + }, []) + }, + + analyzeOneToMany(technology, type, items = []) { + return items.reduce((technologies, { key, value }) => { + const patterns = technology[type][key] || [] + + patterns.forEach((pattern) => { + if (pattern.regex.test(value)) { + technologies.push({ technology, pattern, match: value }) + } + }) + + return technologies + }, []) + }, + + analyzeManyToMany(technology, type, items = {}) { + return Object.keys(technology[type]).reduce((technologies, key) => { + const patterns = technology[type][key] || [] + const values = items[key] || [] + + patterns.forEach((pattern) => { + values.forEach((value) => { + if (pattern.regex.test(value)) { + technologies.push({ technology, pattern, match: value }) + } + }) + }) + + return technologies + }, []) + } +} + +if (typeof module !== 'undefined') { + module.exports = Wappalyzer +} diff --git a/src/wappalyzer.js b/src/wappalyzer.js index 043975167..e69399e87 100644 --- a/src/wappalyzer.js +++ b/src/wappalyzer.js @@ -1,11 +1,3 @@ -/** - * Wappalyzer v5 - * - * Created by Elbert Alias