From 7cd761c2aeeb1cea9bed0659a3d18e72bb2f221e Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Wed, 10 Jun 2020 13:57:34 +1000 Subject: [PATCH] Refactor NPM module --- run | 2 - src/drivers/npm/cli.js | 7 +- src/drivers/npm/driver.js | 180 ++++++++++++++------------ src/drivers/webextension/.gitignore | 1 - src/drivers/webextension/js/driver.js | 6 +- src/drivers/webextension/package.json | 5 - src/drivers/webextension/yarn.lock | 8 -- 7 files changed, 98 insertions(+), 111 deletions(-) delete mode 100644 src/drivers/webextension/package.json delete mode 100644 src/drivers/webextension/yarn.lock diff --git a/run b/run index 0e279eb20..6e8c89a10 100755 --- a/run +++ b/run @@ -11,8 +11,6 @@ fi cmd="docker run --rm -v "$(pwd):/opt/wappalyzer" -it wappalyzer/dev" $cmd sh -c "\ - yarn install; \ - cd src/drivers/webextension; \ yarn install; \ cd ../npm; \ yarn install" diff --git a/src/drivers/npm/cli.js b/src/drivers/npm/cli.js index 9348ce709..2e737ca7e 100755 --- a/src/drivers/npm/cli.js +++ b/src/drivers/npm/cli.js @@ -84,10 +84,6 @@ Options: const site = await wappalyzer.open(url) - site.on('error', (error) => { - process.stderr.write(`page error: ${error}\n`) - }) - const results = await site.analyze() process.stdout.write( @@ -98,7 +94,8 @@ Options: process.exit(0) } catch (error) { - process.stderr.write(error.toString()) + // eslint-disable-next-line no-console + console.error(error) await wappalyzer.destroy() diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index b00d05fc6..e8629382d 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -2,7 +2,13 @@ const { URL } = require('url') const fs = require('fs') const path = require('path') const LanguageDetect = require('languagedetect') -const Wappalyzer = require('./wappalyzer') +const { + setTechnologies, + setCategories, + analyze, + analyzeManyToMany, + resolve +} = require('./wappalyzer') const { AWS_LAMBDA_FUNCTION_NAME, CHROMIUM_BIN } = process.env @@ -30,8 +36,6 @@ const languageDetect = new LanguageDetect() languageDetect.setLanguageType('iso2') -const json = JSON.parse(fs.readFileSync(path.resolve(`${__dirname}/apps.json`))) - const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/ const errorTypes = { @@ -40,6 +44,13 @@ const errorTypes = { NO_HTML_DOCUMENT: 'No HTML document' } +const { apps: technologies, categories } = JSON.parse( + fs.readFileSync(path.resolve(`${__dirname}/apps.json`)) +) + +setTechnologies(technologies) +setCategories(categories) + function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)) } @@ -188,7 +199,7 @@ class Driver { await this.browser.close() - this.log('Done') + this.log('Browser closed') } catch (error) { throw new Error(error.toString()) } @@ -199,10 +210,10 @@ class Driver { return new Site(url, this) } - log(message, source = 'driver', type = 'debug') { + log(message, source = 'driver') { if (this.options.debug) { // eslint-disable-next-line no-console - console.log(`${type.toUpperCase()} | ${source} | ${message}`) + console.log(`wappalyzer | log | ${source} |`, message) } } } @@ -219,21 +230,9 @@ class Site { throw new Error(error.message || error.toString()) } - this.wappalyzer = new Wappalyzer() - - this.wappalyzer.apps = json.apps - this.wappalyzer.categories = json.categories - - this.wappalyzer.parseJsPatterns() - - this.wappalyzer.driver.log = (message, source, type) => - this.log(message, source, type) - this.wappalyzer.driver.displayApps = (detected, meta, context) => - this.displayApps(detected, meta, context) - this.analyzedUrls = {} - this.technologies = [] - this.meta = {} + this.detections = [] + this.language = '' this.listeners = {} @@ -242,7 +241,18 @@ class Site { this.pages = [] } - async init() {} + log(message, source = 'driver', type = 'log') { + if (this.options.debug) { + // eslint-disable-next-line no-console + console[type](`wappalyzer | ${type} | ${source} |`, message) + } + + this.emit(type, { message, source }) + } + + error(error, source = 'driver') { + this.log(error, source, 'error') + } on(event, callback) { if (!this.listeners[event]) { @@ -258,14 +268,6 @@ class Site { } } - log(...args) { - this.emit('log', ...args) - - this.driver.log(...args) - } - - async fetch(url, index, depth) {} - async goto(url) { // Return when the URL is a duplicate or maxUrls has been reached if ( @@ -293,7 +295,7 @@ class Site { await page.setRequestInterception(true) - page.on('error', (error) => this.emit('error', error)) + page.on('error', (error) => this.error(error)) let responseReceived = false @@ -309,7 +311,7 @@ class Site { request.continue() } } catch (error) { - this.emit('error', error) + this.error(error) } }) @@ -340,7 +342,7 @@ class Site { } } } catch (error) { - this.emit('error', error) + this.error(error) } }) @@ -356,7 +358,7 @@ class Site { ) ]) } catch (error) { - this.emit('error', error) + this.error(error) } await sleep(1000) @@ -387,7 +389,8 @@ class Site { ).jsonValue() ).filter((script) => script) - const js = processJs(await page.evaluate(getJs), this.wappalyzer.jsPatterns) + // const js = processJs(await page.evaluate(getJs), this.wappalyzer.jsPatterns) + // TODO const cookies = (await page.cookies()).map( ({ name, value, domain, path }) => ({ @@ -413,29 +416,40 @@ class Site { throw new Error('NO_RESPONSE') } - let language = null + if (!this.language) { + this.language = await ( + await page.evaluateHandle( + () => + document.documentElement.getAttribute('lang') || + document.documentElement.getAttribute('xml:lang') + ) + ).jsonValue() + } - try { - const [attrs] = languageDetect.detect( - html.replace(/<\/?[^>]+(>|$)/g, ' '), - 1 - ) + if (!this.language) { + try { + const [attrs] = languageDetect.detect( + html.replace(/<\/?[^>]+(>|$)/gs, ' '), + 1 + ) - if (attrs) { - ;[language] = attrs + if (attrs) { + ;[this.language] = attrs + } + } catch (error) { + this.error(error) } - } catch (error) { - this.log(`${error} (${url.href})`, 'driver', 'error') } - await this.wappalyzer.analyze(url, { - cookies, - headers: this.headers, - html, - js, - scripts, - language - }) + await this.onDetect( + url, + await analyze(url, { + cookies, + headers: this.headers, + html, + scripts + }) + ) const reducedLinks = Array.prototype.reduce.call( links, @@ -496,13 +510,30 @@ class Site { } } - this.log(`${message} (${url.href})`, 'driver', 'error') + this.error(error) } return { urls: this.analyzedUrls, - applications: this.technologies, - meta: this.meta + applications: resolve(this.detections).map( + ({ name, confidence, version, icon, website, categories }) => ({ + name, + confidence, + version, + icon, + website, + categories: categories.reduce( + (categories, { id, name }) => ({ + ...categories, + [id]: name + }), + {} + ) + }) + ), + meta: { + language: this.language + } } } @@ -520,34 +551,16 @@ class Site { await this.batch(links, depth, batch + 1) } - displayApps(technologies, meta) { - this.meta = meta - - Object.keys(technologies).forEach((name) => { - const { - confidenceTotal: confidence, - version, - props: { cats, icon, website, cpe } - } = technologies[name] - - const categories = cats.reduce((categories, id) => { - categories[id] = json.categories[id].name + onDetect(url, detections = [], language) { + this.detections = this.detections.concat(detections) - return categories - }, {}) - - if (!this.technologies.some(({ name: _name }) => name === _name)) { - this.technologies.push({ - name, - confidence, - version: version || null, - icon: icon || 'default.svg', - website, - cpe: cpe || null, - categories - }) - } - }) + this.detections.filter( + ({ technology: { name }, pattern: { regex } }, index) => + this.detections.findIndex( + ({ technology: { name: _name }, pattern: { regex: _regex } }) => + name === _name && (!regex || regex.toString() === _regex.toString()) + ) === index + ) } async destroy() { @@ -570,6 +583,3 @@ class Site { } module.exports = Driver - -module.exports.processJs = processJs -module.exports.processHtml = processHtml diff --git a/src/drivers/webextension/.gitignore b/src/drivers/webextension/.gitignore index c49f3131d..32d1fb7e2 100644 --- a/src/drivers/webextension/.gitignore +++ b/src/drivers/webextension/.gitignore @@ -1,6 +1,5 @@ /apps.json /images/icons/converted/* /js/wappalyzer.js -/node_modules !.gitkeep diff --git a/src/drivers/webextension/js/driver.js b/src/drivers/webextension/js/driver.js index ab45bcc9a..ca5caa7e8 100644 --- a/src/drivers/webextension/js/driver.js +++ b/src/drivers/webextension/js/driver.js @@ -80,10 +80,6 @@ const Driver = { console[type](`wappalyzer | ${source} |`, message) }, - warn(message, source = 'driver') { - Driver.log(message, source, 'warn') - }, - error(error, source = 'driver') { Driver.log(error, source, 'error') }, @@ -230,7 +226,7 @@ const Driver = { }) // Remove duplicates - cache.detections = cache.detections = cache.detections.concat(detections) + cache.detections = cache.detections.concat(detections) cache.detections.filter( ({ technology: { name }, pattern: { regex } }, index) => diff --git a/src/drivers/webextension/package.json b/src/drivers/webextension/package.json deleted file mode 100644 index 837c5acf8..000000000 --- a/src/drivers/webextension/package.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "dependencies": { - "webextension-polyfill": "^0.4.0" - } -} diff --git a/src/drivers/webextension/yarn.lock b/src/drivers/webextension/yarn.lock deleted file mode 100644 index 1e20414fe..000000000 --- a/src/drivers/webextension/yarn.lock +++ /dev/null @@ -1,8 +0,0 @@ -# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. -# yarn lockfile v1 - - -webextension-polyfill@^0.4.0: - version "0.4.0" - resolved "https://registry.yarnpkg.com/webextension-polyfill/-/webextension-polyfill-0.4.0.tgz#9cc5a60f0f2bf907a6b349fdd7e61701f54956f9" - integrity sha512-oreMp+EoAo1pzRMigx4jB5jInIpx6NTCySPSjGyLLee/dCIPiRqowCEfbFP8o20wz9SOtNwSsfkaJ9D/tRgpag==