diff --git a/src/drivers/npm/browser.js b/src/drivers/npm/browser.js deleted file mode 100644 index 532a75aa3..000000000 --- a/src/drivers/npm/browser.js +++ /dev/null @@ -1,20 +0,0 @@ -class Browser { - constructor(options) { - this.options = options; - - this.window = null; - this.document = null; - this.statusCode = null; - this.contentType = null; - this.headers = null; - this.statusCode = null; - this.contentType = null; - this.html = null; - this.js = null; - this.links = null; - this.scripts = null; - this.cookies = null; - } -} - -module.exports = Browser; diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index e8629382d..266b63b05 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -2,13 +2,15 @@ const { URL } = require('url') const fs = require('fs') const path = require('path') const LanguageDetect = require('languagedetect') +const Wappalyzer = require('./wappalyzer') + const { setTechnologies, setCategories, analyze, analyzeManyToMany, resolve -} = require('./wappalyzer') +} = Wappalyzer const { AWS_LAMBDA_FUNCTION_NAME, CHROMIUM_BIN } = process.env @@ -90,54 +92,17 @@ function getJs() { return dereference(window) } -function processJs(window, patterns) { - const js = {} - - Object.keys(patterns).forEach((appName) => { - js[appName] = {} - - Object.keys(patterns[appName]).forEach((chain) => { - js[appName][chain] = {} - - patterns[appName][chain].forEach((pattern, index) => { - const properties = chain.split('.') - - let value = properties.reduce( - (parent, property) => - parent && parent[property] ? parent[property] : null, - window - ) - - value = - typeof value === 'string' || typeof value === 'number' - ? value - : !!value - - if (value) { - js[appName][chain][index] = value - } - }) - }) - }) - - return js -} - -function processHtml(html, maxCols, maxRows) { - if (maxCols || maxRows) { - const batches = [] - const rows = html.length / maxCols - - for (let i = 0; i < rows; i += 1) { - if (i < maxRows / 2 || i > rows - maxRows / 2) { - batches.push(html.slice(i * maxCols, (i + 1) * maxCols)) - } - } - - html = batches.join('\n') - } - - return html +function analyzeJs(js) { + return Array.prototype.concat.apply( + [], + js.map(({ name, chain, value }) => + analyzeManyToMany( + Wappalyzer.technologies.find(({ name: _name }) => name === _name), + 'js', + { [chain]: [value] } + ) + ) + ) } class Driver { @@ -236,8 +201,6 @@ class Site { this.listeners = {} - this.headers = {} - this.pages = [] } @@ -322,23 +285,28 @@ class Site { status: response.status() } - const headers = response.headers() + const rawHeaders = response.headers() + const headers = {} - Object.keys(headers).forEach((key) => { - this.headers[key] = [ - ...(this.headers[key] || []), - ...(Array.isArray(headers[key]) ? headers[key] : [headers[key]]) + Object.keys(rawHeaders).forEach((key) => { + headers[key] = [ + ...(headers[key] || []), + ...(Array.isArray(rawHeaders[key]) + ? rawHeaders[key] + : [rawHeaders[key]]) ] }) this.contentType = headers['content-type'] || null if (response.status() >= 300 && response.status() < 400) { - if (this.headers.location) { - url = new URL(this.headers.location.slice(-1), url) + if (headers.location) { + url = new URL(headers.location.slice(-1), url) } } else { responseReceived = true + + this.onDetect(analyze(url, { headers })) } } } catch (error) { @@ -346,9 +314,10 @@ class Site { } }) - if (this.options.userAgent) { - await page.setUserAgent(this.options.userAgent) - } + await page.setUserAgent( + this.options.userAgent || + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36' + ) try { await Promise.race([ @@ -363,6 +332,7 @@ class Site { await sleep(1000) + // Links const links = await ( await page.evaluateHandle(() => Array.from(document.getElementsByTagName('a')).map( @@ -378,7 +348,7 @@ class Site { ) ).jsonValue() - // eslint-disable-next-line no-undef + // Script tags const scripts = ( await ( await page.evaluateHandle(() => @@ -389,9 +359,40 @@ class Site { ).jsonValue() ).filter((script) => script) - // const js = processJs(await page.evaluate(getJs), this.wappalyzer.jsPatterns) - // TODO + // JavaScript + const win = await page.evaluate(getJs) + + const js = Wappalyzer.technologies + .filter(({ js }) => Object.keys(js).length) + .map(({ name, js }) => ({ name, chains: Object.keys(js) })) + .reduce((technologies, { name, chains }) => { + chains.forEach((chain) => { + const value = chain + .split('.') + .reduce( + (value, method) => + value && value.hasOwnProperty(method) + ? value[method] + : undefined, + win + ) + + if (typeof value !== 'undefined') { + technologies.push({ + name, + chain, + value: + typeof value === 'string' || typeof value === 'number' + ? value + : !!value + }) + } + }) + return technologies + }, []) + + // Cookies const cookies = (await page.cookies()).map( ({ name, value, domain, path }) => ({ name, @@ -401,11 +402,29 @@ class Site { }) ) - const html = processHtml( - await page.content(), - this.options.htmlMaxCols, - this.options.htmlMaxRows - ) + // HTML + let html = await page.content() + + if (this.options.htmlMaxCols && this.options.htmlMaxRows) { + const batches = [] + const rows = html.length / this.options.htmlMaxCols + + for (let i = 0; i < rows; i += 1) { + if ( + i < this.options.htmlMaxRows / 2 || + i > rows - this.options.htmlMaxRows / 2 + ) { + batches.push( + html.slice( + i * this.options.htmlMaxCols, + (i + 1) * this.options.htmlMaxCols + ) + ) + } + } + + html = batches.join('\n') + } // Validate response if (!this.analyzedUrls[url.href].status) { @@ -441,11 +460,12 @@ class Site { } } - await this.onDetect( + this.onDetect(url, analyzeJs(js)) + + this.onDetect( url, - await analyze(url, { + analyze(url, { cookies, - headers: this.headers, html, scripts }) diff --git a/src/drivers/webextension/js/content.js b/src/drivers/webextension/js/content.js index a1deeb4a6..e4a126a22 100644 --- a/src/drivers/webextension/js/content.js +++ b/src/drivers/webextension/js/content.js @@ -96,7 +96,6 @@ const Content = { wappalyzer: { technologies: technologies .filter(({ js }) => Object.keys(js).length) - .filter(({ name }) => name === 'jQuery') .map(({ name, js }) => ({ name, chains: Object.keys(js) })) } }) diff --git a/src/drivers/webextension/js/driver.js b/src/drivers/webextension/js/driver.js index 60c0f978b..905598097 100644 --- a/src/drivers/webextension/js/driver.js +++ b/src/drivers/webextension/js/driver.js @@ -118,13 +118,11 @@ const Driver = { url, Array.prototype.concat.apply( [], - await Promise.all( - js.map(({ name, chain, value }) => - analyzeManyToMany( - Wappalyzer.technologies.find(({ name: _name }) => name === _name), - 'js', - { [chain]: [value] } - ) + js.map(({ name, chain, value }) => + analyzeManyToMany( + Wappalyzer.technologies.find(({ name: _name }) => name === _name), + 'js', + { [chain]: [value] } ) ) ) @@ -178,10 +176,7 @@ const Driver = { headers['content-type'] && /\/x?html/.test(headers['content-type'][0]) ) { - await Driver.onDetect( - url, - await analyze(url.href, { headers }, { tab }) - ) + await Driver.onDetect(url, analyze(url.href, { headers }, { tab })) } } } catch (error) { @@ -198,7 +193,7 @@ const Driver = { domain: `.${url.hostname}` }) - await Driver.onDetect(url, await analyze(href, items), language, true) + await Driver.onDetect(url, analyze(href, items), language, true) } catch (error) { Driver.error(error) } diff --git a/src/drivers/webextension/js/inject.js b/src/drivers/webextension/js/inject.js index b0807124e..8ae4bd77e 100644 --- a/src/drivers/webextension/js/inject.js +++ b/src/drivers/webextension/js/inject.js @@ -13,7 +13,7 @@ postMessage({ wappalyzer: { - js: technologies.reduce((results, { name, chains }) => { + js: technologies.reduce((technologies, { name, chains }) => { chains.forEach((chain) => { const value = chain .split('.') @@ -25,14 +25,16 @@ window ) - technologies.push({ - name, - chain, - value: - typeof value === 'string' || typeof value === 'number' - ? value - : !!value - }) + if (value !== undefined) { + technologies.push({ + name, + chain, + value: + typeof value === 'string' || typeof value === 'number' + ? value + : !!value + }) + } }) return technologies diff --git a/src/wappalyzer.js b/src/wappalyzer.js index a33048baa..7da3253a7 100644 --- a/src/wappalyzer.js +++ b/src/wappalyzer.js @@ -145,7 +145,7 @@ const Wappalyzer = { } }, - async analyze(url, { html, meta, headers, cookies, scripts }) { + analyze(url, { html, meta, headers, cookies, scripts }) { const oo = Wappalyzer.analyzeOneToOne const om = Wappalyzer.analyzeOneToMany const mm = Wappalyzer.analyzeManyToMany @@ -154,19 +154,15 @@ const Wappalyzer = { try { const detections = flatten( - flatten( - await Promise.all( - Wappalyzer.technologies.map((technology) => - Promise.all([ - oo(technology, 'url', url), - oo(technology, 'html', html), - om(technology, 'meta', meta), - mm(technology, 'headers', headers), - om(technology, 'cookies', cookies), - om(technology, 'scripts', scripts) - ]) - ) - ) + Wappalyzer.technologies.map((technology) => + flatten([ + oo(technology, 'url', url), + oo(technology, 'html', html), + om(technology, 'meta', meta), + mm(technology, 'headers', headers), + om(technology, 'cookies', cookies), + om(technology, 'scripts', scripts) + ]) ) ).filter((technology) => technology)