From c9b0ab7aa2db7ca720014a5afb096334febebcb0 Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Sun, 18 Mar 2018 14:34:20 +1100 Subject: [PATCH] Fix header detection, performance improvements in WebExtension driver --- src/drivers/webextension/js/content.js | 14 ++++- src/drivers/webextension/js/driver.js | 80 +++++++++++--------------- src/drivers/webextension/js/inject.js | 22 +++---- src/wappalyzer.js | 28 +++++---- 4 files changed, 75 insertions(+), 69 deletions(-) diff --git a/src/drivers/webextension/js/content.js b/src/drivers/webextension/js/content.js index e2c1a208d..061830705 100644 --- a/src/drivers/webextension/js/content.js +++ b/src/drivers/webextension/js/content.js @@ -26,15 +26,19 @@ if ( typeof browser !== 'undefined' && typeof document.body !== 'undefined' ) { const script = document.createElement('script'); script.onload = () => { - addEventListener('message', event => { + const onMessage = event => { if ( event.data.id !== 'js' ) { return; } - document.body.removeChild(script); + removeEventListener('message', onMessage); sendMessage('analyze', { js: event.data.js }); - }, true); + + script.remove(); + }; + + addEventListener('message', onMessage); sendMessage('get_js_patterns', {}, response => { if ( response ) { @@ -61,3 +65,7 @@ function sendMessage(id, subject, callback) { source: 'content.js' }, callback || ( () => {} )); } + +// https://stackoverflow.com/a/44774834 +// https://developer.mozilla.org/en-US/Add-ons/WebExtensions/API/tabs/executeScript#Return_value +undefined; diff --git a/src/drivers/webextension/js/driver.js b/src/drivers/webextension/js/driver.js index d05d81534..bbe784d10 100644 --- a/src/drivers/webextension/js/driver.js +++ b/src/drivers/webextension/js/driver.js @@ -8,7 +8,6 @@ const wappalyzer = new Wappalyzer(); var tabCache = {}; -var headersCache = {}; var categoryOrder = []; var options = {}; var robotsTxtQueue = {}; @@ -118,48 +117,43 @@ getOption('version') getOption('dynamicIcon', true); getOption('pinnedCategory'); -// Run content script -var callback = tabs => { - tabs.forEach(tab => { - if ( tab.url.match(/^https?:\/\//) ) { +// Run content script on all tabs +browser.tabs.query({ url: [ 'http://*/*', 'https://*/*' ] }) + .then(tabs => { + tabs.forEach(tab => { browser.tabs.executeScript(tab.id, { - file: 'js/content.js' + file: '../js/content.js' }); - } + }) }) -}; - -browser.tabs.query({}) - .then(callback) .catch(error => wappalyzer.log(error, 'driver', 'error')); // Capture response headers browser.webRequest.onCompleted.addListener(request => { - var responseHeaders = {}; + const headers = {}; if ( request.responseHeaders ) { - var url = wappalyzer.parseUrl(request.url); + const url = wappalyzer.parseUrl(request.url); - request.responseHeaders.forEach(function(header) { - if ( !responseHeaders[header.name.toLowerCase()] ) { - responseHeaders[header.name.toLowerCase()] = [] - } - responseHeaders[header.name.toLowerCase()].push(header.value || '' + header.binaryValue); - }); + browser.tabs.query({ url: [ url.canonical ] }) + .then(tabs => { + const tab = tabs[0] || null; - if ( headersCache.length > 50 ) { - headersCache = {}; - } + if ( tab ) { + request.responseHeaders.forEach(header => { + const name = header.name.toLowerCase(); - if ( /text\/html/.test(responseHeaders['content-type'][0]) ) { - if ( headersCache[url.canonical] === undefined ) { - headersCache[url.canonical] = {}; - } + headers[name] = headers[name] || []; - Object.keys(responseHeaders).forEach(header => { - headersCache[url.canonical][header] = responseHeaders[header].slice(); - }); - } + headers[name].push(header.value || header.binaryValue.toString()); + }); + + if ( headers['content-type'] && /\/x?html/.test(headers['content-type'][0]) ) { + wappalyzer.analyze(url, { headers }, { tab }); + } + } + }) + .catch(error => wappalyzer.log(error, 'driver', 'error')); } }, { urls: [ 'http://*/*', 'https://*/*' ], types: [ 'main_frame' ] }, [ 'responseHeaders' ]); @@ -167,7 +161,7 @@ browser.webRequest.onCompleted.addListener(request => { ( chrome || browser ).runtime.onMessage.addListener((message, sender, sendResponse) => { if ( typeof message.id != 'undefined' ) { if ( message.id !== 'log' ) { - wappalyzer.log('Message received' + ( message.source ? ' from ' + message.source : '' ) + ': ' + message.id, 'driver'); + wappalyzer.log('Message' + ( message.source ? ' from ' + message.source : '' ) + ': ' + message.id, 'driver'); } var url = wappalyzer.parseUrl(sender.tab ? sender.tab.url : ''); @@ -175,24 +169,16 @@ browser.webRequest.onCompleted.addListener(request => { switch ( message.id ) { case 'log': - wappalyzer.log(message.message, message.source); + wappalyzer.log(message.subject, message.source); break; case 'init': browser.cookies.getAll({ domain: '.' + url.hostname }) - .then(cookies => wappalyzer.analyze(url, { cookies }, { - tab: sender.tab - })); + .then(cookies => wappalyzer.analyze(url, { cookies }, { tab: sender.tab })); break; case 'analyze': - if ( headersCache[url.canonical] !== undefined ) { - message.subject.headers = headersCache[url.canonical]; - } - - wappalyzer.analyze(url, message.subject, { - tab: sender.tab - }); + wappalyzer.analyze(url, message.subject, { tab: sender.tab }); break; case 'ad_log': @@ -242,7 +228,13 @@ wappalyzer.driver.log = (message, source, type) => { wappalyzer.driver.displayApps = (detected, meta, context) => { var tab = context.tab; - tabCache[tab.id] = tabCache[tab.id] || { detected: [] }; + if ( tab === undefined ) { + return; + } + + tabCache[tab.id] = tabCache[tab.id] || { + detected: [] + }; tabCache[tab.id].detected = detected; @@ -295,8 +287,6 @@ wappalyzer.driver.displayApps = (detected, meta, context) => { */ wappalyzer.driver.getRobotsTxt = (host, secure = false) => { if ( robotsTxtQueue.hasOwnProperty(host) ) { - wappalyzer.log('robotTxt fetch already in queue'); - return robotsTxtQueue[host]; } diff --git a/src/drivers/webextension/js/inject.js b/src/drivers/webextension/js/inject.js index 55ac2be4d..f860e7646 100644 --- a/src/drivers/webextension/js/inject.js +++ b/src/drivers/webextension/js/inject.js @@ -1,10 +1,14 @@ -(function() { +(() => { try { - addEventListener('message', (event => { + addEventListener('message', onMessage); + + function onMessage(event) { if ( event.data.id !== 'patterns' ) { return; } + removeEventListener('message', onMessage); + const patterns = event.data.patterns || {}; const js = {}; @@ -30,13 +34,9 @@ } postMessage({ id: 'js', js }, '*'); - }), false); - } catch(e) { - // Fail quietly - } + } - function detectJs(chain) { - try { + function detectJs(chain) { const properties = chain.split('.'); var value = properties.length ? window : null; @@ -54,8 +54,8 @@ } return typeof value === 'string' || typeof value === 'number' ? value : !!value; - } catch(e) { - // Fail quietly } + } catch(e) { + // Fail quietly } -}()); +})(); diff --git a/src/wappalyzer.js b/src/wappalyzer.js index d289fd2fd..cf91b953d 100644 --- a/src/wappalyzer.js +++ b/src/wappalyzer.js @@ -42,22 +42,28 @@ class Wappalyzer { } analyze(url, data, context) { + const startTime = new Date(); + const promises = []; var apps = {}; - if ( typeof data.html !== 'string' ) { - data.html = ''; - } - if ( this.detected[url.canonical] === undefined ) { this.detected[url.canonical] = {}; } // Additional information - const matches = data.html.match(/]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i); + var language = null; + + if ( data.html ) { + if ( typeof data.html !== 'string' ) { + data.html = ''; + } + + const matches = data.html.match(/]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i); - const language = matches && matches.length ? matches[1] : null; + language = matches && matches.length ? matches[1] : null; + } Object.keys(this.apps).forEach(appName => { apps[appName] = this.detected[url.canonical] && this.detected[url.canonical][appName] ? this.detected[url.canonical][appName] : new Application(appName, this.apps[appName]); @@ -86,7 +92,7 @@ class Wappalyzer { if ( data.env ) { promises.push(this.analyzeEnv(app, data.env)); } - }) + }); if ( data.js ) { Object.keys(data.js).forEach(appName => { @@ -111,8 +117,10 @@ class Wappalyzer { this.cacheDetectedApps(apps, url.canonical); this.trackDetectedApps(apps, url, language); + this.log('Processing ' + Object.keys(data).join(', ') + ' took ' + (( new Date() - startTime ) / 1000).toFixed(2) + 's (' + url.hostname + ')', 'core'); + if ( Object.keys(apps).length ) { - this.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + ' on ' + url.canonical, 'core'); + this.log('Identified ' + Object.keys(apps).join(', ') + ' (' + url.hostname + ')', 'core'); } this.driver.displayApps(this.detected[url.canonical], { language }, context); @@ -492,9 +500,9 @@ class Wappalyzer { const promises = []; Object.keys(patterns).forEach(headerName => { - headerName = headerName.toLowerCase(); - promises.push(this.asyncForEach(patterns[headerName], pattern => { + headerName = headerName.toLowerCase(); + if ( headerName in headers ) { headers[headerName].forEach(headerValue => { if ( pattern.regex.test(headerValue) ) {