From 65c2ce86dcb4d4ceea9cf4ceebc28f84fc8f75a0 Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Thu, 11 Jun 2020 12:57:35 +1000 Subject: [PATCH] Build v6.0.7 --- bin/build | 1 + src/apps.json | 2 +- src/drivers/npm/README.md | 13 +- src/drivers/npm/driver.js | 51 +++-- src/drivers/webextension/js/content.js | 18 +- src/drivers/webextension/js/driver.js | 40 ++-- src/wappalyzer.js | 28 +-- src/wappalyzer.spec.js | 273 ------------------------- 8 files changed, 86 insertions(+), 340 deletions(-) delete mode 100644 src/wappalyzer.spec.js diff --git a/bin/build b/bin/build index 527c46cde..10e2145e4 100755 --- a/bin/build +++ b/bin/build @@ -39,6 +39,7 @@ set -e # NPM sed -i "s/\"version\": \"[^\"]*\"/\"version\": \"$version\"/" src/drivers/npm/package.json +sed -i "s/\"version\": \"[^\"]*\"/\"version\": \"$version\"/" src/package.json # WebExtension diff --git a/src/apps.json b/src/apps.json index cdf02642f..5a9459464 100644 --- a/src/apps.json +++ b/src/apps.json @@ -4225,7 +4225,7 @@ }, "icon": "GitHub.svg", "implies": "Ruby on Rails", - "url": "^https?://[^/]+\\.github\\.io/", + "url": "^https?://[^/]+\\.github\\.io", "website": "https://pages.github.com/" }, "GitLab": { diff --git a/src/drivers/npm/README.md b/src/drivers/npm/README.md index 495847b28..5eda7d416 100644 --- a/src/drivers/npm/README.md +++ b/src/drivers/npm/README.md @@ -2,22 +2,23 @@ [Wappalyzer](https://www.wappalyzer.com/) indentifies technologies on websites. +*Note:* The [wappalyzer-core](https://www.npmjs.com/package/wappalyzer-core) package provides a low-level API without dependencies. -## CLI +## Command line -## Installation +### Installation ```shell $ npm i -g wappalyzer ``` -## Usage +### Usage ``` wappalyzer [options] ``` -### Options +#### Options ``` -b, --batch-size=... Process links in batches @@ -37,13 +38,13 @@ wappalyzer [options] ## Dependency -## Installation +### Installation ```shell $ npm i wappalyzer ``` -## Usage +### Usage ```javascript const Wappalyzer = require('wappalyzer'); diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index 266b63b05..4ad7143bb 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -306,7 +306,7 @@ class Site { } else { responseReceived = true - this.onDetect(analyze(url, { headers })) + this.onDetect(analyze({ headers })) } } } catch (error) { @@ -349,15 +349,28 @@ class Site { ).jsonValue() // Script tags - const scripts = ( - await ( - await page.evaluateHandle(() => - Array.from(document.getElementsByTagName('script')).map( - ({ src }) => src - ) - ) - ).jsonValue() - ).filter((script) => script) + const scripts = await ( + await page.evaluateHandle(() => + Array.from(document.getElementsByTagName('script')) + .map(({ src }) => src) + .filter((src) => src) + ) + ).jsonValue() + + // Meta tags + const meta = await ( + await page.evaluateHandle(() => + Array.from(document.querySelectorAll('meta')).reduce((metas, meta) => { + const key = meta.getAttribute('name') || meta.getAttribute('property') + + if (key) { + metas[key.toLowerCase()] = [meta.getAttribute('content')] + } + + return metas + }, {}) + ) + ).jsonValue() // JavaScript const win = await page.evaluate(getJs) @@ -393,13 +406,12 @@ class Site { }, []) // Cookies - const cookies = (await page.cookies()).map( - ({ name, value, domain, path }) => ({ - name, - value, - domain, - path - }) + const cookies = (await page.cookies()).reduce( + (cookies, { name, value }) => ({ + ...cookies, + [name]: [value] + }), + {} ) // HTML @@ -464,10 +476,11 @@ class Site { this.onDetect( url, - analyze(url, { + analyze({ cookies, html, - scripts + scripts, + meta }) ) diff --git a/src/drivers/webextension/js/content.js b/src/drivers/webextension/js/content.js index e4a126a22..be689bceb 100644 --- a/src/drivers/webextension/js/content.js +++ b/src/drivers/webextension/js/content.js @@ -53,12 +53,18 @@ const Content = { .filter((script) => script.indexOf('data:text/javascript;') !== 0) // Meta tags - const meta = Array.from(document.querySelectorAll('meta')) - .map((meta) => ({ - key: meta.getAttribute('name') || meta.getAttribute('property'), - value: meta.getAttribute('content') - })) - .filter(({ value }) => value) + const meta = Array.from(document.querySelectorAll('meta')).reduce( + (metas, meta) => { + const key = meta.getAttribute('name') || meta.getAttribute('property') + + if (key) { + metas[key.toLowerCase()] = [meta.getAttribute('content')] + } + + return metas + }, + {} + ) Content.port.postMessage({ func: 'onContentLoad', diff --git a/src/drivers/webextension/js/driver.js b/src/drivers/webextension/js/driver.js index 905598097..7369600db 100644 --- a/src/drivers/webextension/js/driver.js +++ b/src/drivers/webextension/js/driver.js @@ -68,7 +68,7 @@ const Driver = { if (previous === null) { Driver.open('https://www.wappalyzer.com/installed') } else if (version !== previous && upgradeMessage) { - // Driver.open(`https://www.wappalyzer.com/upgraded?v${version}`, false) + Driver.open(`https://www.wappalyzer.com/upgraded?v${version}`, false) } await setOption('version', version) @@ -111,9 +111,7 @@ const Driver = { } }, - async analyzeJs(href, js) { - const url = new URL(href) - + async analyzeJs(url, js) { await Driver.onDetect( url, Array.prototype.concat.apply( @@ -157,9 +155,9 @@ const Driver = { const headers = {} try { - const url = new URL(request.url) - - const [tab] = await promisify(chrome.tabs, 'query', { url: [url.href] }) + const [tab] = await promisify(chrome.tabs, 'query', { + url: [request.url] + }) if (tab) { request.responseHeaders.forEach((header) => { @@ -176,7 +174,7 @@ const Driver = { headers['content-type'] && /\/x?html/.test(headers['content-type'][0]) ) { - await Driver.onDetect(url, analyze(url.href, { headers }, { tab })) + await Driver.onDetect(request.url, analyze({ headers })) } } } catch (error) { @@ -185,15 +183,23 @@ const Driver = { } }, - async onContentLoad(href, items, language) { + async onContentLoad(url, items, language) { try { - const url = new URL(href) + const { hostname } = new URL(url) - items.cookies = await promisify(chrome.cookies, 'getAll', { - domain: `.${url.hostname}` - }) + items.cookies = ( + await promisify(chrome.cookies, 'getAll', { + domain: `.${hostname}` + }) + ).reduce( + (cookies, { name, value }) => ({ + ...cookies, + [name]: [value] + }), + {} + ) - await Driver.onDetect(url, analyze(href, items), language, true) + await Driver.onDetect(url, analyze({ url, ...items }), language, true) } catch (error) { Driver.error(error) } @@ -208,7 +214,7 @@ const Driver = { return } - const { hostname, href } = url + const { hostname } = new URL(url) // Cache detections const cache = (Driver.cache.hostnames[hostname] = { @@ -278,7 +284,7 @@ const Driver = { await Driver.setIcon(url, resolved) - const tabs = await promisify(chrome.tabs, 'query', { url: [href] }) + const tabs = await promisify(chrome.tabs, 'query', { url }) tabs.forEach(({ id }) => (Driver.cache.tabs[id] = resolved)) @@ -314,7 +320,7 @@ const Driver = { })[0] || { icon }) } - const tabs = await promisify(chrome.tabs, 'query', { url: [url.href] }) + const tabs = await promisify(chrome.tabs, 'query', { url }) await Promise.all( tabs.map(async ({ id: tabId }) => { diff --git a/src/wappalyzer.js b/src/wappalyzer.js index 7da3253a7..dbbc74ebc 100644 --- a/src/wappalyzer.js +++ b/src/wappalyzer.js @@ -110,7 +110,7 @@ const Wappalyzer = { const index = resolved.findIndex(({ name }) => name === excluded.name) - if (index === -1) { + if (index !== -1) { resolved.splice(index, 1) } }) @@ -145,7 +145,7 @@ const Wappalyzer = { } }, - analyze(url, { html, meta, headers, cookies, scripts }) { + analyze({ url, html, meta, headers, cookies, scripts }) { const oo = Wappalyzer.analyzeOneToOne const om = Wappalyzer.analyzeOneToMany const mm = Wappalyzer.analyzeManyToMany @@ -158,10 +158,10 @@ const Wappalyzer = { flatten([ oo(technology, 'url', url), oo(technology, 'html', html), - om(technology, 'meta', meta), - mm(technology, 'headers', headers), - om(technology, 'cookies', cookies), - om(technology, 'scripts', scripts) + om(technology, 'scripts', scripts), + mm(technology, 'cookies', cookies), + mm(technology, 'meta', meta), + mm(technology, 'headers', headers) ]) ) ).filter((technology) => technology) @@ -196,15 +196,7 @@ const Wappalyzer = { categories: cats || [], slug: Wappalyzer.slugify(name), url: transform(url), - headers: transform( - Object.keys(headers || {}).reduce( - (lcHeaders, header) => ({ - ...lcHeaders, - [header.toLowerCase()]: headers[header] - }), - {} - ) - ), + headers: transform(headers), cookies: transform(cookies), html: transform(html), meta: transform(meta), @@ -248,7 +240,7 @@ const Wappalyzer = { } const parsed = Object.keys(patterns).reduce((parsed, key) => { - parsed[key] = toArray(patterns[key]).map((pattern) => { + parsed[key.toLowerCase()] = toArray(patterns[key]).map((pattern) => { const { regex, confidence, version } = pattern .split('\\;') .reduce((attrs, attr, i) => { @@ -295,8 +287,8 @@ const Wappalyzer = { }, analyzeOneToMany(technology, type, items = []) { - return items.reduce((technologies, { key, value }) => { - const patterns = technology[type][key] || [] + return items.reduce((technologies, value) => { + const patterns = technology[type] || [] patterns.forEach((pattern) => { if (pattern.regex.test(value)) { diff --git a/src/wappalyzer.spec.js b/src/wappalyzer.spec.js deleted file mode 100644 index 0df917d63..000000000 --- a/src/wappalyzer.spec.js +++ /dev/null @@ -1,273 +0,0 @@ -/* eslint-env mocha */ - -const { assert, expect } = require('chai') -const Wappalyzer = require('../src/wappalyzer') - -const appsJson = { - appUrl: { - url: 'test' - }, - appCookies: { - cookies: { - test: 'test' - } - }, - appUppercaseCookies: { - cookies: { - Test: 'Test' - } - }, - appHeaders: { - headers: { - 'X-Powered-By': 'test' - } - }, - appHtml: { - html: 'test v(\\d)\\;confidence:50\\;version:\\1', - implies: 'appImplies', - excludes: 'appExcludes' - }, - appMeta: { - meta: { - generator: 'test' - } - }, - appScript: { - script: 'test' - }, - appJs: { - js: { - key: 'value' - } - }, - appImplies: {}, - appExcludes: { - html: 'test' - } -} - -const driverData = { - cookies: [ - { - name: 'test', - value: 'test', - domain: '', - path: '' - } - ], - headers: { - 'x-powered-by': ['test'] - }, - html: ' html test v1', - scripts: ['test'], - js: { - appJs: { - key: ['value'] - } - } -} - -describe('Wappalyzer', () => { - describe('#analyze()', () => { - let apps - - before(async () => { - const wappalyzer = new Wappalyzer() - - wappalyzer.apps = appsJson - - wappalyzer.parseJsPatterns() - - wappalyzer.driver.displayApps = (detected) => { - apps = detected - } - - await wappalyzer.analyze({ canonical: 'test' }, driverData) - }) - - it('should identify technologies using URLs', () => { - expect(apps).to.have.any.keys('appUrl') - }) - - it('should identify technologies using HTML', () => { - expect(apps).to.have.any.keys('appHtml') - }) - - it('should identify technologies using meta tags', () => { - expect(apps).to.have.any.keys('appMeta') - }) - - it('should identify technologies using script URLs', () => { - expect(apps).to.have.any.keys('appScript') - }) - - it('should identify technologies using headers', () => { - expect(apps).to.have.any.keys('appHeaders') - }) - - it('should identify technologies using cookies', () => { - expect(apps).to.have.any.keys('appCookies') - }) - - it('should identify technologies using uppercase named cookies', () => { - expect(apps).to.have.any.keys('appUppercaseCookies') - }) - - it('should identify technologies using JavaScript', () => { - expect(apps).to.have.any.keys('appJs') - }) - - it('should return the implied technology', () => { - expect(apps).to.have.any.keys('appImplies') - }) - - it('should not return the excluded technology', () => { - expect(apps).to.not.have.any.keys('appExcludes') - }) - - it('should return the confidence value', () => { - assert.equal(apps.appHtml.confidenceTotal, 50) - }) - - it('should return the version number', () => { - assert.equal(apps.appHtml.version, '1') - }) - - it('should analyze html', async () => { - const html = ` - - - - Page title | Html detection - - - -

Technologies Test Page | Html detection

- - - - - - ` - const wappalyzer = new Wappalyzer() - wappalyzer.apps = { - 'Google Tag Manager': { - html: [ - 'googletagmanager\\.com/ns\\.html[^>]+>', - '' - ] - } - } - let applications = null - wappalyzer.driver = { - log() {}, - displayApps(detectedMap) { - applications = detectedMap - } - } - - await wappalyzer.analyze({ canonical: 'example.com' }, { html }) - assert.equal( - applications['Google Tag Manager'].name, - 'Google Tag Manager' - ) - }) - - it('should analyze scripts', async () => { - const scripts = [ - 'http://www.google-analytics.com/analytics.js', - 'http://example.com/assets/js/jquery.min.js' - ] - const wappalyzer = new Wappalyzer() - wappalyzer.apps = { - 'Google Analytics': { - cats: [10], - script: - 'google-analytics\\.com\\/(?:ga|urchin|(analytics))\\.js\\;version:\\1?UA:' - }, - jQuery: { - script: [ - 'jquery(?:\\-|\\.)([\\d.]*\\d)[^/]*\\.js\\;version:\\1', - '/([\\d.]+)/jquery(?:\\.min)?\\.js\\;version:\\1', - 'jquery.*\\.js(?:\\?ver(?:sion)?=([\\d.]+))?\\;version:\\1' - ] - } - } - let applications = null - wappalyzer.driver = { - log() {}, - displayApps(detectedMap) { - applications = detectedMap - } - } - - await wappalyzer.analyze({ canonical: 'example.com' }, { scripts }) - assert.equal(applications['Google Analytics'].name, 'Google Analytics') - assert.equal(applications.jQuery.name, 'jQuery') - }) - - it('should analyze headers', async () => { - const headers = { - date: ['Thu, 01 Feb 2018 11:34:18 GMT'], - connection: ['keep-alive'], - 'x-powered-by': ['Express'], - etag: ['W/125-1jQLmiya7mfec43xR3Eb3pjdu64s'], - 'content-length': ['293'], - 'content-type': ['text/html; charset=utf-8'] - } - const wappalyzer = new Wappalyzer() - wappalyzer.apps = { - Express: { - headers: { - 'X-Powered-By': '^Express$' - } - } - } - let applications = null - wappalyzer.driver = { - log() {}, - displayApps(detectedMap) { - applications = detectedMap - } - } - - await wappalyzer.analyze({ canonical: 'example.com' }, { headers }) - assert.equal(applications.Express.name, 'Express') - }) - - it('should analyze js globals', async () => { - const js = { - 'Moment.js': { moment: { '0': true } }, - 'Google Font API': { WebFonts: { '0': true } } - } - const wappalyzer = new Wappalyzer() - wappalyzer.apps = { - 'Moment.js': { - js: { - moment: '', - 'moment.version': '(.*)\\;version:\\1' - } - }, - 'Google Font API': { - js: { - WebFonts: '' - } - } - } - let applications = null - wappalyzer.driver = { - log() {}, - displayApps(detectedMap) { - applications = detectedMap - } - } - - wappalyzer.parseJsPatterns() - await wappalyzer.analyze({ canonical: 'example.com' }, { js }) - - assert.equal(applications['Google Font API'].name, 'Google Font API') - assert.equal(applications['Moment.js'].name, 'Moment.js') - }) - }) -})