From 15c2ca8ca39ce6f49626fa0c61305b781f46737a Mon Sep 17 00:00:00 2001
From: Elbert Alias <77259+AliasIO@users.noreply.github.com>
Date: Wed, 28 Dec 2022 10:28:16 +1100
Subject: [PATCH] Implement extended probe feature
---
README.md | 11 ++++
src/drivers/npm/cli.js | 36 ++++++-------
src/drivers/npm/driver.js | 46 ++++++++++++++---
src/technologies/m.json | 4 +-
src/technologies/s.json | 7 ++-
src/wappalyzer.js | 106 ++++++++++++++++++--------------------
6 files changed, 125 insertions(+), 85 deletions(-)
diff --git a/README.md b/README.md
index 6204affb3..177419a94 100644
--- a/README.md
+++ b/README.md
@@ -100,6 +100,9 @@ Patterns (regular expressions) are kept in [`src/technologies/`](https://github.
"meta": {
"generator": "(?:Example|Another Example)"
},
+ "probe": {
+ "/path": ""
+ },
"scriptSrc": "example-([0-9.]+)\\.js\\;confidence:50\\;version:\\1",
"scripts": "function webpackJsonpCallback\\(data\\) {",
"url": "example\\.com",
@@ -367,6 +370,14 @@ Plus any of:
"\\.example-class" |
+
+ probe |
+ Object |
+
+ Request a URL to test for its existance or match text content (NPM driver only).
+ |
+ { "/path": "Example text" } |
+
robots |
String | Array |
diff --git a/src/drivers/npm/cli.js b/src/drivers/npm/cli.js
index bda0ad5ed..36dc896bb 100755
--- a/src/drivers/npm/cli.js
+++ b/src/drivers/npm/cli.js
@@ -72,24 +72,24 @@ Examples:
docker wappalyzer/cli https://www.example.com --pretty
Options:
- -b, --batch-size=... Process links in batches
- -d, --debug Output debug messages
- -t, --delay=ms Wait for ms milliseconds between requests
- -h, --help This text
- -H, --header Extra header to send with requests
- --html-max-cols=... Limit the number of HTML characters per line processed
- --html-max-rows=... Limit the number of HTML lines processed
- -D, --max-depth=... Don't analyse pages more than num levels deep
- -m, --max-urls=... Exit when num URLs have been analysed
- -w, --max-wait=... Wait no more than ms milliseconds for page resources to load
- -p, --probe Perform a deeper scan by performing additional requests and inspecting DNS records
- -P, --pretty Pretty-print JSON output
- --proxy=... Proxy URL, e.g. 'http://user:pass@proxy:8080'
- -r, --recursive Follow links on pages (crawler)
- -a, --user-agent=... Set the user agent string
- -n, --no-scripts Disabled JavaScript on web pages
- -N, --no-redirect Disable cross-domain redirects
- -e, --extended Output additional information
+ -b, --batch-size=... Process links in batches
+ -d, --debug Output debug messages
+ -t, --delay=ms Wait for ms milliseconds between requests
+ -h, --help This text
+ -H, --header Extra header to send with requests
+ --html-max-cols=... Limit the number of HTML characters per line processed
+ --html-max-rows=... Limit the number of HTML lines processed
+ -D, --max-depth=... Don't analyse pages more than num levels deep
+ -m, --max-urls=... Exit when num URLs have been analysed
+ -w, --max-wait=... Wait no more than ms milliseconds for page resources to load
+ -p, --probe=[basic|full] Perform a deeper scan by performing additional requests and inspecting DNS records
+ -P, --pretty Pretty-print JSON output
+ --proxy=... Proxy URL, e.g. 'http://user:pass@proxy:8080'
+ -r, --recursive Follow links on pages (crawler)
+ -a, --user-agent=... Set the user agent string
+ -n, --no-scripts Disabled JavaScript on web pages
+ -N, --no-redirect Disable cross-domain redirects
+ -e, --extended Output additional information
`)
process.exit(options.help ? 0 : 1)
}
diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js
index 5f0e73602..bd84ef2a7 100644
--- a/src/drivers/npm/driver.js
+++ b/src/drivers/npm/driver.js
@@ -289,7 +289,7 @@ function get(url, options = {}) {
},
},
(response) => {
- if (response.statusCode >= 400) {
+ if (response.statusCode >= 300) {
return reject(
new Error(`${response.statusCode} ${response.statusMessage}`)
)
@@ -337,7 +337,12 @@ class Driver {
this.options.debug = Boolean(+this.options.debug)
this.options.recursive = Boolean(+this.options.recursive)
- this.options.probe = Boolean(+this.options.probe)
+ this.options.probe =
+ String(this.options.probe || '').toLowerCase() === 'basic'
+ ? 'basic'
+ : String(this.options.probe || '').toLowerCase() === 'full'
+ ? 'full'
+ : Boolean(+this.options.probe) && 'full'
this.options.delay = parseInt(this.options.delay, 10)
this.options.maxDepth = parseInt(this.options.maxDepth, 10)
this.options.maxUrls = parseInt(this.options.maxUrls, 10)
@@ -1153,8 +1158,25 @@ class Site {
}
async probe(url) {
- const files = {
- robots: '/robots.txt',
+ const paths = [
+ {
+ type: 'robots',
+ path: '/robots.txt',
+ },
+ ]
+
+ if (this.options.probe === 'full') {
+ Wappalyzer.technologies
+ .filter(({ probe }) => Object.keys(probe).length)
+ .forEach((technology) => {
+ paths.push(
+ ...Object.keys(technology.probe).map((path) => ({
+ type: 'probe',
+ path,
+ technology,
+ }))
+ )
+ })
}
// DNS
@@ -1180,9 +1202,7 @@ class Site {
await Promise.allSettled([
// Static files
- ...Object.keys(files).map(async (file, index) => {
- const path = files[file]
-
+ ...paths.map(async ({ type, path, technology }, index) => {
try {
await sleep(this.options.delay * index)
@@ -1193,7 +1213,17 @@ class Site {
this.log(`Probe ok (${path})`)
- await this.onDetect(url, analyze({ [file]: body.slice(0, 100000) }))
+ const text = body.slice(0, 100000)
+
+ await this.onDetect(
+ url,
+ analyze(
+ {
+ [type]: path ? { [path]: [text] } : text,
+ },
+ technology && [technology]
+ )
+ )
} catch (error) {
this.error(`Probe failed (${path}): ${error.message || error}`)
}
diff --git a/src/technologies/m.json b/src/technologies/m.json
index 17c801dff..003b4082b 100644
--- a/src/technologies/m.json
+++ b/src/technologies/m.json
@@ -303,7 +303,7 @@
},
"oss": true,
"probe": {
- "/magento_version": "Magento/([0-9.]+)\\;version:\\1"
+ "/magento_version": ""
},
"scriptSrc": [
"js/mage",
@@ -3162,4 +3162,4 @@
],
"website": "https://code.google.com/p/modwsgi"
}
-}
\ No newline at end of file
+}
diff --git a/src/technologies/s.json b/src/technologies/s.json
index 020ebbb04..206244a8f 100644
--- a/src/technologies/s.json
+++ b/src/technologies/s.json
@@ -3470,13 +3470,16 @@
"img[src^='/-/media/']",
"img[src*='/~/media/.+\\.ashx']"
],
+ "probe": {
+ "/layouts/System/VisitorIdentification.aspx": ""
+ },
"icon": "Sitecore.svg",
"pricing": [
"poa",
"recurring",
"high"
],
- "requires": "Microsoft ASP.NET",
+ "implies": "Microsoft ASP.NET",
"saas": true,
"website": "https://www.sitecore.com/"
},
@@ -6698,4 +6701,4 @@
},
"website": "https://styled-components.com"
}
-}
\ No newline at end of file
+}
diff --git a/src/wappalyzer.js b/src/wappalyzer.js
index 68350b086..7c168ea64 100644
--- a/src/wappalyzer.js
+++ b/src/wappalyzer.js
@@ -298,19 +298,20 @@ const Wappalyzer = {
const mm = Wappalyzer.analyzeManyToMany
const relations = {
- url: oo,
- xhr: oo,
- html: oo,
- text: oo,
- scripts: oo,
- css: oo,
- robots: oo,
certIssuer: oo,
- scriptSrc: om,
cookies: mm,
- meta: mm,
- headers: mm,
+ css: oo,
dns: mm,
+ headers: mm,
+ html: oo,
+ meta: mm,
+ probe: mm,
+ robots: oo,
+ scriptSrc: om,
+ scripts: oo,
+ text: oo,
+ url: oo,
+ xhr: oo,
}
try {
@@ -344,82 +345,77 @@ const Wappalyzer = {
Wappalyzer.technologies = Object.keys(data).reduce((technologies, name) => {
const {
cats,
+ certIssuer,
+ cookies,
+ cpe,
+ css,
description,
- url,
- xhr,
+ dns,
dom,
- html,
- text,
- scripts,
- css,
- robots,
- meta,
+ excludes,
headers,
- dns,
- certIssuer,
- cookies,
- scriptSrc,
- js,
+ html,
+ icon,
implies,
- excludes,
+ js,
+ meta,
+ pricing,
+ probe,
requires,
requiresCategory,
- icon,
+ robots,
+ scriptSrc,
+ scripts,
+ text,
+ url,
website,
- pricing,
- cpe,
+ xhr,
} = data[name]
technologies.push({
- name,
- description: description || null,
categories: cats || [],
- slug: Wappalyzer.slugify(name),
- url: transform(url),
- xhr: transform(xhr),
- headers: transform(headers),
- dns: transform(dns),
+ certIssuer: transform(certIssuer),
cookies: transform(cookies),
+ cpe: cpe || null,
+ css: transform(css),
+ description: description || null,
+ dns: transform(dns),
dom: transform(
typeof dom === 'string' || Array.isArray(dom)
? toArray(dom).reduce(
- (dom, selector) => ({
- ...dom,
- [selector]: { exists: '' },
- }),
+ (dom, selector) => ({ ...dom, [selector]: { exists: '' } }),
{}
)
: dom,
true,
false
),
+ excludes: transform(excludes).map(({ value }) => ({ name: value })),
+ headers: transform(headers),
html: transform(html),
- text: transform(text),
- scripts: transform(scripts),
- css: transform(css),
- certIssuer: transform(certIssuer),
- robots: transform(robots),
- meta: transform(meta),
- scriptSrc: transform(scriptSrc),
- js: transform(js, true),
+ icon: icon || 'default.svg',
implies: transform(implies).map(({ value, confidence, version }) => ({
name: value,
confidence,
version,
})),
- excludes: transform(excludes).map(({ value }) => ({
- name: value,
- })),
- requires: transform(requires).map(({ value }) => ({
- name: value,
- })),
+ js: transform(js, true),
+ meta: transform(meta),
+ name,
+ pricing: pricing || [],
+ probe: transform(probe, true),
+ requires: transform(requires).map(({ value }) => ({ name: value })),
requiresCategory: transform(requiresCategory).map(({ value }) => ({
id: value,
})),
- icon: icon || 'default.svg',
+ robots: transform(robots),
+ scriptSrc: transform(scriptSrc),
+ scripts: transform(scripts),
+ slug: Wappalyzer.slugify(name),
+ text: transform(text),
+ url: transform(url),
website: website || null,
- pricing: pricing || [],
- cpe: cpe || null,
+ xhr: transform(xhr),
})
return technologies