Implement extended probe feature

main
Elbert Alias 2 years ago
parent 2b881c7244
commit 15c2ca8ca3

@ -100,6 +100,9 @@ Patterns (regular expressions) are kept in [`src/technologies/`](https://github.
"meta": { "meta": {
"generator": "(?:Example|Another Example)" "generator": "(?:Example|Another Example)"
}, },
"probe": {
"/path": ""
},
"scriptSrc": "example-([0-9.]+)\\.js\\;confidence:50\\;version:\\1", "scriptSrc": "example-([0-9.]+)\\.js\\;confidence:50\\;version:\\1",
"scripts": "function webpackJsonpCallback\\(data\\) {", "scripts": "function webpackJsonpCallback\\(data\\) {",
"url": "example\\.com", "url": "example\\.com",
@ -367,6 +370,14 @@ Plus any of:
</td> </td>
<td><code>"\\.example-class"</code></td> <td><code>"\\.example-class"</code></td>
</tr> </tr>
<tr>
<td><code>probe</code></td>
<td>Object</td>
<td>
Request a URL to test for its existance or match text content (NPM driver only).
</td>
<td><code>{ "/path": "Example text" }</code></td>
</tr>
<tr> <tr>
<td><code>robots</code></td> <td><code>robots</code></td>
<td>String | Array</td> <td>String | Array</td>

@ -82,7 +82,7 @@ Options:
-D, --max-depth=... Don't analyse pages more than num levels deep -D, --max-depth=... Don't analyse pages more than num levels deep
-m, --max-urls=... Exit when num URLs have been analysed -m, --max-urls=... Exit when num URLs have been analysed
-w, --max-wait=... Wait no more than ms milliseconds for page resources to load -w, --max-wait=... Wait no more than ms milliseconds for page resources to load
-p, --probe Perform a deeper scan by performing additional requests and inspecting DNS records -p, --probe=[basic|full] Perform a deeper scan by performing additional requests and inspecting DNS records
-P, --pretty Pretty-print JSON output -P, --pretty Pretty-print JSON output
--proxy=... Proxy URL, e.g. 'http://user:pass@proxy:8080' --proxy=... Proxy URL, e.g. 'http://user:pass@proxy:8080'
-r, --recursive Follow links on pages (crawler) -r, --recursive Follow links on pages (crawler)

@ -289,7 +289,7 @@ function get(url, options = {}) {
}, },
}, },
(response) => { (response) => {
if (response.statusCode >= 400) { if (response.statusCode >= 300) {
return reject( return reject(
new Error(`${response.statusCode} ${response.statusMessage}`) new Error(`${response.statusCode} ${response.statusMessage}`)
) )
@ -337,7 +337,12 @@ class Driver {
this.options.debug = Boolean(+this.options.debug) this.options.debug = Boolean(+this.options.debug)
this.options.recursive = Boolean(+this.options.recursive) this.options.recursive = Boolean(+this.options.recursive)
this.options.probe = Boolean(+this.options.probe) this.options.probe =
String(this.options.probe || '').toLowerCase() === 'basic'
? 'basic'
: String(this.options.probe || '').toLowerCase() === 'full'
? 'full'
: Boolean(+this.options.probe) && 'full'
this.options.delay = parseInt(this.options.delay, 10) this.options.delay = parseInt(this.options.delay, 10)
this.options.maxDepth = parseInt(this.options.maxDepth, 10) this.options.maxDepth = parseInt(this.options.maxDepth, 10)
this.options.maxUrls = parseInt(this.options.maxUrls, 10) this.options.maxUrls = parseInt(this.options.maxUrls, 10)
@ -1153,8 +1158,25 @@ class Site {
} }
async probe(url) { async probe(url) {
const files = { const paths = [
robots: '/robots.txt', {
type: 'robots',
path: '/robots.txt',
},
]
if (this.options.probe === 'full') {
Wappalyzer.technologies
.filter(({ probe }) => Object.keys(probe).length)
.forEach((technology) => {
paths.push(
...Object.keys(technology.probe).map((path) => ({
type: 'probe',
path,
technology,
}))
)
})
} }
// DNS // DNS
@ -1180,9 +1202,7 @@ class Site {
await Promise.allSettled([ await Promise.allSettled([
// Static files // Static files
...Object.keys(files).map(async (file, index) => { ...paths.map(async ({ type, path, technology }, index) => {
const path = files[file]
try { try {
await sleep(this.options.delay * index) await sleep(this.options.delay * index)
@ -1193,7 +1213,17 @@ class Site {
this.log(`Probe ok (${path})`) this.log(`Probe ok (${path})`)
await this.onDetect(url, analyze({ [file]: body.slice(0, 100000) })) const text = body.slice(0, 100000)
await this.onDetect(
url,
analyze(
{
[type]: path ? { [path]: [text] } : text,
},
technology && [technology]
)
)
} catch (error) { } catch (error) {
this.error(`Probe failed (${path}): ${error.message || error}`) this.error(`Probe failed (${path}): ${error.message || error}`)
} }

@ -303,7 +303,7 @@
}, },
"oss": true, "oss": true,
"probe": { "probe": {
"/magento_version": "Magento/([0-9.]+)\\;version:\\1" "/magento_version": ""
}, },
"scriptSrc": [ "scriptSrc": [
"js/mage", "js/mage",

@ -3470,13 +3470,16 @@
"img[src^='/-/media/']", "img[src^='/-/media/']",
"img[src*='/~/media/.+\\.ashx']" "img[src*='/~/media/.+\\.ashx']"
], ],
"probe": {
"/layouts/System/VisitorIdentification.aspx": ""
},
"icon": "Sitecore.svg", "icon": "Sitecore.svg",
"pricing": [ "pricing": [
"poa", "poa",
"recurring", "recurring",
"high" "high"
], ],
"requires": "Microsoft ASP.NET", "implies": "Microsoft ASP.NET",
"saas": true, "saas": true,
"website": "https://www.sitecore.com/" "website": "https://www.sitecore.com/"
}, },

@ -298,19 +298,20 @@ const Wappalyzer = {
const mm = Wappalyzer.analyzeManyToMany const mm = Wappalyzer.analyzeManyToMany
const relations = { const relations = {
url: oo,
xhr: oo,
html: oo,
text: oo,
scripts: oo,
css: oo,
robots: oo,
certIssuer: oo, certIssuer: oo,
scriptSrc: om,
cookies: mm, cookies: mm,
meta: mm, css: oo,
headers: mm,
dns: mm, dns: mm,
headers: mm,
html: oo,
meta: mm,
probe: mm,
robots: oo,
scriptSrc: om,
scripts: oo,
text: oo,
url: oo,
xhr: oo,
} }
try { try {
@ -344,82 +345,77 @@ const Wappalyzer = {
Wappalyzer.technologies = Object.keys(data).reduce((technologies, name) => { Wappalyzer.technologies = Object.keys(data).reduce((technologies, name) => {
const { const {
cats, cats,
certIssuer,
cookies,
cpe,
css,
description, description,
url, dns,
xhr,
dom, dom,
html, excludes,
text,
scripts,
css,
robots,
meta,
headers, headers,
dns, html,
certIssuer, icon,
cookies,
scriptSrc,
js,
implies, implies,
excludes, js,
meta,
pricing,
probe,
requires, requires,
requiresCategory, requiresCategory,
icon, robots,
scriptSrc,
scripts,
text,
url,
website, website,
pricing, xhr,
cpe,
} = data[name] } = data[name]
technologies.push({ technologies.push({
name,
description: description || null,
categories: cats || [], categories: cats || [],
slug: Wappalyzer.slugify(name), certIssuer: transform(certIssuer),
url: transform(url),
xhr: transform(xhr),
headers: transform(headers),
dns: transform(dns),
cookies: transform(cookies), cookies: transform(cookies),
cpe: cpe || null,
css: transform(css),
description: description || null,
dns: transform(dns),
dom: transform( dom: transform(
typeof dom === 'string' || Array.isArray(dom) typeof dom === 'string' || Array.isArray(dom)
? toArray(dom).reduce( ? toArray(dom).reduce(
(dom, selector) => ({ (dom, selector) => ({ ...dom, [selector]: { exists: '' } }),
...dom,
[selector]: { exists: '' },
}),
{} {}
) )
: dom, : dom,
true, true,
false false
), ),
excludes: transform(excludes).map(({ value }) => ({ name: value })),
headers: transform(headers),
html: transform(html), html: transform(html),
text: transform(text), icon: icon || 'default.svg',
scripts: transform(scripts),
css: transform(css),
certIssuer: transform(certIssuer),
robots: transform(robots),
meta: transform(meta),
scriptSrc: transform(scriptSrc),
js: transform(js, true),
implies: transform(implies).map(({ value, confidence, version }) => ({ implies: transform(implies).map(({ value, confidence, version }) => ({
name: value, name: value,
confidence, confidence,
version, version,
})), })),
excludes: transform(excludes).map(({ value }) => ({ js: transform(js, true),
name: value, meta: transform(meta),
})), name,
requires: transform(requires).map(({ value }) => ({ pricing: pricing || [],
name: value, probe: transform(probe, true),
})), requires: transform(requires).map(({ value }) => ({ name: value })),
requiresCategory: transform(requiresCategory).map(({ value }) => ({ requiresCategory: transform(requiresCategory).map(({ value }) => ({
id: value, id: value,
})), })),
icon: icon || 'default.svg', robots: transform(robots),
scriptSrc: transform(scriptSrc),
scripts: transform(scripts),
slug: Wappalyzer.slugify(name),
text: transform(text),
url: transform(url),
website: website || null, website: website || null,
pricing: pricing || [], xhr: transform(xhr),
cpe: cpe || null,
}) })
return technologies return technologies