Implement extended probe feature

main
Elbert Alias 2 years ago
parent 2b881c7244
commit 15c2ca8ca3

@ -100,6 +100,9 @@ Patterns (regular expressions) are kept in [`src/technologies/`](https://github.
"meta": {
"generator": "(?:Example|Another Example)"
},
"probe": {
"/path": ""
},
"scriptSrc": "example-([0-9.]+)\\.js\\;confidence:50\\;version:\\1",
"scripts": "function webpackJsonpCallback\\(data\\) {",
"url": "example\\.com",
@ -367,6 +370,14 @@ Plus any of:
</td>
<td><code>"\\.example-class"</code></td>
</tr>
<tr>
<td><code>probe</code></td>
<td>Object</td>
<td>
Request a URL to test for its existance or match text content (NPM driver only).
</td>
<td><code>{ "/path": "Example text" }</code></td>
</tr>
<tr>
<td><code>robots</code></td>
<td>String | Array</td>

@ -72,24 +72,24 @@ Examples:
docker wappalyzer/cli https://www.example.com --pretty
Options:
-b, --batch-size=... Process links in batches
-d, --debug Output debug messages
-t, --delay=ms Wait for ms milliseconds between requests
-h, --help This text
-H, --header Extra header to send with requests
--html-max-cols=... Limit the number of HTML characters per line processed
--html-max-rows=... Limit the number of HTML lines processed
-D, --max-depth=... Don't analyse pages more than num levels deep
-m, --max-urls=... Exit when num URLs have been analysed
-w, --max-wait=... Wait no more than ms milliseconds for page resources to load
-p, --probe Perform a deeper scan by performing additional requests and inspecting DNS records
-P, --pretty Pretty-print JSON output
--proxy=... Proxy URL, e.g. 'http://user:pass@proxy:8080'
-r, --recursive Follow links on pages (crawler)
-a, --user-agent=... Set the user agent string
-n, --no-scripts Disabled JavaScript on web pages
-N, --no-redirect Disable cross-domain redirects
-e, --extended Output additional information
-b, --batch-size=... Process links in batches
-d, --debug Output debug messages
-t, --delay=ms Wait for ms milliseconds between requests
-h, --help This text
-H, --header Extra header to send with requests
--html-max-cols=... Limit the number of HTML characters per line processed
--html-max-rows=... Limit the number of HTML lines processed
-D, --max-depth=... Don't analyse pages more than num levels deep
-m, --max-urls=... Exit when num URLs have been analysed
-w, --max-wait=... Wait no more than ms milliseconds for page resources to load
-p, --probe=[basic|full] Perform a deeper scan by performing additional requests and inspecting DNS records
-P, --pretty Pretty-print JSON output
--proxy=... Proxy URL, e.g. 'http://user:pass@proxy:8080'
-r, --recursive Follow links on pages (crawler)
-a, --user-agent=... Set the user agent string
-n, --no-scripts Disabled JavaScript on web pages
-N, --no-redirect Disable cross-domain redirects
-e, --extended Output additional information
`)
process.exit(options.help ? 0 : 1)
}

@ -289,7 +289,7 @@ function get(url, options = {}) {
},
},
(response) => {
if (response.statusCode >= 400) {
if (response.statusCode >= 300) {
return reject(
new Error(`${response.statusCode} ${response.statusMessage}`)
)
@ -337,7 +337,12 @@ class Driver {
this.options.debug = Boolean(+this.options.debug)
this.options.recursive = Boolean(+this.options.recursive)
this.options.probe = Boolean(+this.options.probe)
this.options.probe =
String(this.options.probe || '').toLowerCase() === 'basic'
? 'basic'
: String(this.options.probe || '').toLowerCase() === 'full'
? 'full'
: Boolean(+this.options.probe) && 'full'
this.options.delay = parseInt(this.options.delay, 10)
this.options.maxDepth = parseInt(this.options.maxDepth, 10)
this.options.maxUrls = parseInt(this.options.maxUrls, 10)
@ -1153,8 +1158,25 @@ class Site {
}
async probe(url) {
const files = {
robots: '/robots.txt',
const paths = [
{
type: 'robots',
path: '/robots.txt',
},
]
if (this.options.probe === 'full') {
Wappalyzer.technologies
.filter(({ probe }) => Object.keys(probe).length)
.forEach((technology) => {
paths.push(
...Object.keys(technology.probe).map((path) => ({
type: 'probe',
path,
technology,
}))
)
})
}
// DNS
@ -1180,9 +1202,7 @@ class Site {
await Promise.allSettled([
// Static files
...Object.keys(files).map(async (file, index) => {
const path = files[file]
...paths.map(async ({ type, path, technology }, index) => {
try {
await sleep(this.options.delay * index)
@ -1193,7 +1213,17 @@ class Site {
this.log(`Probe ok (${path})`)
await this.onDetect(url, analyze({ [file]: body.slice(0, 100000) }))
const text = body.slice(0, 100000)
await this.onDetect(
url,
analyze(
{
[type]: path ? { [path]: [text] } : text,
},
technology && [technology]
)
)
} catch (error) {
this.error(`Probe failed (${path}): ${error.message || error}`)
}

@ -303,7 +303,7 @@
},
"oss": true,
"probe": {
"/magento_version": "Magento/([0-9.]+)\\;version:\\1"
"/magento_version": ""
},
"scriptSrc": [
"js/mage",

@ -3470,13 +3470,16 @@
"img[src^='/-/media/']",
"img[src*='/~/media/.+\\.ashx']"
],
"probe": {
"/layouts/System/VisitorIdentification.aspx": ""
},
"icon": "Sitecore.svg",
"pricing": [
"poa",
"recurring",
"high"
],
"requires": "Microsoft ASP.NET",
"implies": "Microsoft ASP.NET",
"saas": true,
"website": "https://www.sitecore.com/"
},

@ -298,19 +298,20 @@ const Wappalyzer = {
const mm = Wappalyzer.analyzeManyToMany
const relations = {
url: oo,
xhr: oo,
html: oo,
text: oo,
scripts: oo,
css: oo,
robots: oo,
certIssuer: oo,
scriptSrc: om,
cookies: mm,
meta: mm,
headers: mm,
css: oo,
dns: mm,
headers: mm,
html: oo,
meta: mm,
probe: mm,
robots: oo,
scriptSrc: om,
scripts: oo,
text: oo,
url: oo,
xhr: oo,
}
try {
@ -344,82 +345,77 @@ const Wappalyzer = {
Wappalyzer.technologies = Object.keys(data).reduce((technologies, name) => {
const {
cats,
certIssuer,
cookies,
cpe,
css,
description,
url,
xhr,
dns,
dom,
html,
text,
scripts,
css,
robots,
meta,
excludes,
headers,
dns,
certIssuer,
cookies,
scriptSrc,
js,
html,
icon,
implies,
excludes,
js,
meta,
pricing,
probe,
requires,
requiresCategory,
icon,
robots,
scriptSrc,
scripts,
text,
url,
website,
pricing,
cpe,
xhr,
} = data[name]
technologies.push({
name,
description: description || null,
categories: cats || [],
slug: Wappalyzer.slugify(name),
url: transform(url),
xhr: transform(xhr),
headers: transform(headers),
dns: transform(dns),
certIssuer: transform(certIssuer),
cookies: transform(cookies),
cpe: cpe || null,
css: transform(css),
description: description || null,
dns: transform(dns),
dom: transform(
typeof dom === 'string' || Array.isArray(dom)
? toArray(dom).reduce(
(dom, selector) => ({
...dom,
[selector]: { exists: '' },
}),
(dom, selector) => ({ ...dom, [selector]: { exists: '' } }),
{}
)
: dom,
true,
false
),
excludes: transform(excludes).map(({ value }) => ({ name: value })),
headers: transform(headers),
html: transform(html),
text: transform(text),
scripts: transform(scripts),
css: transform(css),
certIssuer: transform(certIssuer),
robots: transform(robots),
meta: transform(meta),
scriptSrc: transform(scriptSrc),
js: transform(js, true),
icon: icon || 'default.svg',
implies: transform(implies).map(({ value, confidence, version }) => ({
name: value,
confidence,
version,
})),
excludes: transform(excludes).map(({ value }) => ({
name: value,
})),
requires: transform(requires).map(({ value }) => ({
name: value,
})),
js: transform(js, true),
meta: transform(meta),
name,
pricing: pricing || [],
probe: transform(probe, true),
requires: transform(requires).map(({ value }) => ({ name: value })),
requiresCategory: transform(requiresCategory).map(({ value }) => ({
id: value,
})),
icon: icon || 'default.svg',
robots: transform(robots),
scriptSrc: transform(scriptSrc),
scripts: transform(scripts),
slug: Wappalyzer.slugify(name),
text: transform(text),
url: transform(url),
website: website || null,
pricing: pricing || [],
cpe: cpe || null,
xhr: transform(xhr),
})
return technologies