|
|
@ -1,4 +1,3 @@
|
|
|
|
const { URL } = require('url')
|
|
|
|
|
|
|
|
const os = require('os')
|
|
|
|
const os = require('os')
|
|
|
|
const fs = require('fs')
|
|
|
|
const fs = require('fs')
|
|
|
|
const dns = require('dns').promises
|
|
|
|
const dns = require('dns').promises
|
|
|
@ -11,10 +10,6 @@ const Wappalyzer = require('./wappalyzer')
|
|
|
|
const { setTechnologies, setCategories, analyze, analyzeManyToMany, resolve } =
|
|
|
|
const { setTechnologies, setCategories, analyze, analyzeManyToMany, resolve } =
|
|
|
|
Wappalyzer
|
|
|
|
Wappalyzer
|
|
|
|
|
|
|
|
|
|
|
|
function next() {
|
|
|
|
|
|
|
|
return new Promise((resolve) => setImmediate(resolve))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const { CHROMIUM_BIN, CHROMIUM_DATA_DIR, CHROMIUM_WEBSOCKET } = process.env
|
|
|
|
const { CHROMIUM_BIN, CHROMIUM_DATA_DIR, CHROMIUM_WEBSOCKET } = process.env
|
|
|
|
|
|
|
|
|
|
|
|
const chromiumArgs = [
|
|
|
|
const chromiumArgs = [
|
|
|
@ -99,21 +94,16 @@ function getJs(page, technologies = Wappalyzer.technologies) {
|
|
|
|
}, technologies)
|
|
|
|
}, technologies)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async function analyzeJs(js, technologies = Wappalyzer.technologies) {
|
|
|
|
function analyzeJs(js, technologies = Wappalyzer.technologies) {
|
|
|
|
return Array.prototype.concat.apply(
|
|
|
|
return js
|
|
|
|
[],
|
|
|
|
.map(({ name, chain, value }) => {
|
|
|
|
await Promise.all(
|
|
|
|
|
|
|
|
js.map(async ({ name, chain, value }) => {
|
|
|
|
|
|
|
|
await next()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return analyzeManyToMany(
|
|
|
|
return analyzeManyToMany(
|
|
|
|
technologies.find(({ name: _name }) => name === _name),
|
|
|
|
technologies.find(({ name: _name }) => name === _name),
|
|
|
|
'js',
|
|
|
|
'js',
|
|
|
|
{ [chain]: [value] }
|
|
|
|
{ [chain]: [value] }
|
|
|
|
)
|
|
|
|
)
|
|
|
|
})
|
|
|
|
})
|
|
|
|
)
|
|
|
|
.flat()
|
|
|
|
)
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function getDom(page, technologies = Wappalyzer.technologies) {
|
|
|
|
function getDom(page, technologies = Wappalyzer.technologies) {
|
|
|
@ -201,25 +191,10 @@ function getDom(page, technologies = Wappalyzer.technologies) {
|
|
|
|
}, technologies)
|
|
|
|
}, technologies)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async function analyzeDom(dom, technologies = Wappalyzer.technologies) {
|
|
|
|
function analyzeDom(dom, technologies = Wappalyzer.technologies) {
|
|
|
|
return Array.prototype.concat.apply(
|
|
|
|
return dom
|
|
|
|
[],
|
|
|
|
.map(({ name, selector, exists, text, property, attribute, value }) => {
|
|
|
|
await Promise.all(
|
|
|
|
const technology = technologies.find(({ name: _name }) => name === _name)
|
|
|
|
dom.map(
|
|
|
|
|
|
|
|
async ({
|
|
|
|
|
|
|
|
name,
|
|
|
|
|
|
|
|
selector,
|
|
|
|
|
|
|
|
exists,
|
|
|
|
|
|
|
|
text,
|
|
|
|
|
|
|
|
property,
|
|
|
|
|
|
|
|
attribute,
|
|
|
|
|
|
|
|
value,
|
|
|
|
|
|
|
|
}) => {
|
|
|
|
|
|
|
|
await next()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const technology = technologies.find(
|
|
|
|
|
|
|
|
({ name: _name }) => name === _name
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (typeof exists !== 'undefined') {
|
|
|
|
if (typeof exists !== 'undefined') {
|
|
|
|
return analyzeManyToMany(technology, 'dom.exists', {
|
|
|
|
return analyzeManyToMany(technology, 'dom.exists', {
|
|
|
@ -240,20 +215,12 @@ async function analyzeDom(dom, technologies = Wappalyzer.technologies) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (typeof attribute !== 'undefined') {
|
|
|
|
if (typeof attribute !== 'undefined') {
|
|
|
|
return analyzeManyToMany(
|
|
|
|
return analyzeManyToMany(technology, `dom.attributes.${attribute}`, {
|
|
|
|
technology,
|
|
|
|
|
|
|
|
`dom.attributes.${attribute}`,
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
[selector]: [value],
|
|
|
|
[selector]: [value],
|
|
|
|
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
.flat()
|
|
|
|
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function get(url, options = {}) {
|
|
|
|
function get(url, options = {}) {
|
|
|
@ -497,11 +464,8 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
async goto(url) {
|
|
|
|
async goto(url) {
|
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached
|
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached
|
|
|
|
if (
|
|
|
|
if (this.analyzedUrls[url.href]) {
|
|
|
|
this.analyzedUrls[url.href] ||
|
|
|
|
return []
|
|
|
|
Object.keys(this.analyzedUrls).length >= this.options.maxUrls
|
|
|
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
this.log(`Navigate to ${url}`, 'page')
|
|
|
|
this.log(`Navigate to ${url}`, 'page')
|
|
|
@ -556,7 +520,7 @@ class Site {
|
|
|
|
if (!this.analyzedXhr[url.hostname].includes(hostname)) {
|
|
|
|
if (!this.analyzedXhr[url.hostname].includes(hostname)) {
|
|
|
|
this.analyzedXhr[url.hostname].push(hostname)
|
|
|
|
this.analyzedXhr[url.hostname].push(hostname)
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(url, await analyze({ xhr: hostname }))
|
|
|
|
await this.onDetect(url, analyze({ xhr: hostname }))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}, 1000)
|
|
|
|
}, 1000)
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -588,12 +552,13 @@ class Site {
|
|
|
|
page.on('response', async (response) => {
|
|
|
|
page.on('response', async (response) => {
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
if (
|
|
|
|
if (
|
|
|
|
|
|
|
|
response.status < 300 &&
|
|
|
|
response.frame().url() === url.href &&
|
|
|
|
response.frame().url() === url.href &&
|
|
|
|
response.request().resourceType() === 'script'
|
|
|
|
response.request().resourceType() === 'script'
|
|
|
|
) {
|
|
|
|
) {
|
|
|
|
const scripts = await response.text()
|
|
|
|
const scripts = await response.text()
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(response.url(), await analyze({ scripts }))
|
|
|
|
await this.onDetect(response.url(), analyze({ scripts }))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (response.url() === url.href) {
|
|
|
|
if (response.url() === url.href) {
|
|
|
@ -613,22 +578,34 @@ class Site {
|
|
|
|
]
|
|
|
|
]
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Prevent cross-domain redirects
|
|
|
|
if (response.status() >= 300 && response.status() < 400) {
|
|
|
|
if (response.status() >= 300 && response.status() < 400) {
|
|
|
|
if (headers.location) {
|
|
|
|
if (headers.location) {
|
|
|
|
url = new URL(headers.location.slice(-1), url)
|
|
|
|
const _url = new URL(headers.location.slice(-1), url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
|
|
|
_url.hostname.replace(/^www\./, '') ===
|
|
|
|
|
|
|
|
this.originalUrl.hostname.replace(/^www\./, '') ||
|
|
|
|
|
|
|
|
(Object.keys(this.analyzedUrls).length === 1 &&
|
|
|
|
|
|
|
|
!this.options.noRedirect)
|
|
|
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
url = _url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
responseReceived = true
|
|
|
|
responseReceived = true
|
|
|
|
|
|
|
|
|
|
|
|
const certIssuer = response.securityDetails()
|
|
|
|
const certIssuer = response.securityDetails()
|
|
|
|
? response.securityDetails().issuer()
|
|
|
|
? response.securityDetails().issuer()
|
|
|
|
: ''
|
|
|
|
: ''
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(url, await analyze({ headers, certIssuer }))
|
|
|
|
await this.onDetect(url, analyze({ headers, certIssuer }))
|
|
|
|
|
|
|
|
|
|
|
|
await this.emit('response', { page, response, headers, certIssuer })
|
|
|
|
await this.emit('response', { page, response, headers, certIssuer })
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
this.error(error)
|
|
|
|
this.error(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -727,9 +704,10 @@ class Site {
|
|
|
|
text = await this.promiseTimeout(
|
|
|
|
text = await this.promiseTimeout(
|
|
|
|
(
|
|
|
|
(
|
|
|
|
await this.promiseTimeout(
|
|
|
|
await this.promiseTimeout(
|
|
|
|
page.evaluateHandle(() =>
|
|
|
|
page.evaluateHandle(
|
|
|
|
|
|
|
|
() =>
|
|
|
|
// eslint-disable-next-line unicorn/prefer-text-content
|
|
|
|
// eslint-disable-next-line unicorn/prefer-text-content
|
|
|
|
document.body.innerText.replace(/\s+/g, ' ').slice(0, 25000)
|
|
|
|
document.body.innerText // .replace(/\s+/g, ' ').slice(0, 25000)
|
|
|
|
),
|
|
|
|
),
|
|
|
|
{ jsonValue: () => '' },
|
|
|
|
{ jsonValue: () => '' },
|
|
|
|
'Timeout (text)'
|
|
|
|
'Timeout (text)'
|
|
|
@ -814,7 +792,11 @@ class Site {
|
|
|
|
meta.getAttribute('name') || meta.getAttribute('property')
|
|
|
|
meta.getAttribute('name') || meta.getAttribute('property')
|
|
|
|
|
|
|
|
|
|
|
|
if (key) {
|
|
|
|
if (key) {
|
|
|
|
metas[key.toLowerCase()] = [meta.getAttribute('content')]
|
|
|
|
metas[key.toLowerCase()] = metas[key.toLowerCase()] || []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metas[key.toLowerCase()].push(
|
|
|
|
|
|
|
|
meta.getAttribute('content')
|
|
|
|
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return metas
|
|
|
|
return metas
|
|
|
@ -851,8 +833,7 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(
|
|
|
|
await this.onDetect(
|
|
|
|
url,
|
|
|
|
url,
|
|
|
|
(
|
|
|
|
[
|
|
|
|
await Promise.all([
|
|
|
|
|
|
|
|
analyzeDom(dom),
|
|
|
|
analyzeDom(dom),
|
|
|
|
analyzeJs(js),
|
|
|
|
analyzeJs(js),
|
|
|
|
analyze({
|
|
|
|
analyze({
|
|
|
@ -865,8 +846,7 @@ class Site {
|
|
|
|
scriptSrc,
|
|
|
|
scriptSrc,
|
|
|
|
meta,
|
|
|
|
meta,
|
|
|
|
}),
|
|
|
|
}),
|
|
|
|
])
|
|
|
|
].flat()
|
|
|
|
).flat()
|
|
|
|
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
const reducedLinks = Array.prototype.reduce.call(
|
|
|
|
const reducedLinks = Array.prototype.reduce.call(
|
|
|
@ -957,14 +937,23 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
await Promise.all([
|
|
|
|
await Promise.all([
|
|
|
|
(async () => {
|
|
|
|
(async () => {
|
|
|
|
const links = await this.goto(url)
|
|
|
|
const links = ((await this.goto(url)) || []).filter(
|
|
|
|
|
|
|
|
({ href }) => !this.analyzedUrls[href]
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
if (
|
|
|
|
links &&
|
|
|
|
links.length &&
|
|
|
|
this.options.recursive &&
|
|
|
|
this.options.recursive &&
|
|
|
|
|
|
|
|
Object.keys(this.analyzedUrls).length < this.options.maxUrls &&
|
|
|
|
depth < this.options.maxDepth
|
|
|
|
depth < this.options.maxDepth
|
|
|
|
) {
|
|
|
|
) {
|
|
|
|
await this.batch(links.slice(0, this.options.maxUrls), depth + 1)
|
|
|
|
await this.batch(
|
|
|
|
|
|
|
|
links.slice(
|
|
|
|
|
|
|
|
0,
|
|
|
|
|
|
|
|
this.options.maxUrls - Object.keys(this.analyzedUrls).length
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
depth + 1
|
|
|
|
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})(),
|
|
|
|
})(),
|
|
|
|
(async () => {
|
|
|
|
(async () => {
|
|
|
@ -977,7 +966,7 @@ class Site {
|
|
|
|
])
|
|
|
|
])
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
this.analyzedUrls[url.href] = {
|
|
|
|
status: 0,
|
|
|
|
status: this.analyzedUrls[url.href]?.status || 0,
|
|
|
|
error: error.message || error.toString(),
|
|
|
|
error: error.message || error.toString(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -1058,10 +1047,7 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
this.log(`Probe ok (${path})`)
|
|
|
|
this.log(`Probe ok (${path})`)
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(
|
|
|
|
await this.onDetect(url, analyze({ [file]: body.slice(0, 100000) }))
|
|
|
|
url,
|
|
|
|
|
|
|
|
await analyze({ [file]: body.slice(0, 100000) })
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
this.error(`Probe failed (${path}): ${error.message || error}`)
|
|
|
|
this.error(`Probe failed (${path}): ${error.message || error}`)
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -1099,7 +1085,7 @@ class Site {
|
|
|
|
`Probe DNS ok: (${Object.values(dnsRecords).flat().length} records)`
|
|
|
|
`Probe DNS ok: (${Object.values(dnsRecords).flat().length} records)`
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(url, await analyze({ dns: dnsRecords }))
|
|
|
|
await this.onDetect(url, analyze({ dns: dnsRecords }))
|
|
|
|
|
|
|
|
|
|
|
|
resolve()
|
|
|
|
resolve()
|
|
|
|
}),
|
|
|
|
}),
|
|
|
@ -1174,11 +1160,10 @@ class Site {
|
|
|
|
|
|
|
|
|
|
|
|
await this.onDetect(
|
|
|
|
await this.onDetect(
|
|
|
|
url,
|
|
|
|
url,
|
|
|
|
(
|
|
|
|
[
|
|
|
|
await Promise.all([
|
|
|
|
|
|
|
|
analyzeDom(dom, technologies),
|
|
|
|
analyzeDom(dom, technologies),
|
|
|
|
analyzeJs(js, technologies),
|
|
|
|
analyzeJs(js, technologies),
|
|
|
|
analyze(
|
|
|
|
await analyze(
|
|
|
|
{
|
|
|
|
{
|
|
|
|
url,
|
|
|
|
url,
|
|
|
|
cookies,
|
|
|
|
cookies,
|
|
|
@ -1191,8 +1176,7 @@ class Site {
|
|
|
|
},
|
|
|
|
},
|
|
|
|
technologies
|
|
|
|
technologies
|
|
|
|
),
|
|
|
|
),
|
|
|
|
])
|
|
|
|
].flat()
|
|
|
|
).flat()
|
|
|
|
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|