Implement requires in NPM driver

main
Elbert Alias 4 years ago
parent 739fe4cc8d
commit 07ac98ffce

@ -217,6 +217,19 @@
} }
] ]
}, },
"requires": {
"oneOf": [
{
"type": "array",
"items": {
"$ref": "#/definitions/non-empty-non-blank-string"
}
},
{
"$ref": "#/definitions/non-empty-non-blank-string"
}
]
},
"meta": { "meta": {
"type": "object", "type": "object",
"additionalProperties": false, "additionalProperties": false,

@ -53,7 +53,40 @@ function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms)) return new Promise((resolve) => setTimeout(resolve, ms))
} }
async function analyzeJs(js) { function getJs(page, technologies = Wappalyzer.technologies) {
return page.evaluate((technologies) => {
return technologies
.filter(({ js }) => Object.keys(js).length)
.map(({ name, js }) => ({ name, chains: Object.keys(js) }))
.reduce((technologies, { name, chains }) => {
chains.forEach((chain) => {
chain = chain.replace(/\[([^\]]+)\]/g, '.$1')
const value = chain
.split('.')
.reduce(
(value, method) => (value ? value[method] : undefined),
window
)
if (typeof value !== 'undefined') {
technologies.push({
name,
chain,
value:
typeof value === 'string' || typeof value === 'number'
? value
: !!value,
})
}
})
return technologies
}, [])
}, technologies)
}
async function analyzeJs(js, technologies = Wappalyzer.technologies) {
return Array.prototype.concat.apply( return Array.prototype.concat.apply(
[], [],
await Promise.all( await Promise.all(
@ -61,7 +94,7 @@ async function analyzeJs(js) {
await next() await next()
return analyzeManyToMany( return analyzeManyToMany(
Wappalyzer.technologies.find(({ name: _name }) => name === _name), technologies.find(({ name: _name }) => name === _name),
'js', 'js',
{ [chain]: [value] } { [chain]: [value] }
) )
@ -70,7 +103,92 @@ async function analyzeJs(js) {
) )
} }
async function analyzeDom(dom) { function getDom(page, technologies = Wappalyzer.technologies) {
return page.evaluate((technologies) => {
return technologies
.filter(({ dom }) => dom && dom.constructor === Object)
.reduce((technologies, { name, dom }) => {
const toScalar = (value) =>
typeof value === 'string' || typeof value === 'number'
? value
: !!value
Object.keys(dom).forEach((selector) => {
let nodes = []
try {
nodes = document.querySelectorAll(selector)
} catch (error) {
// Continue
}
if (!nodes.length) {
return
}
dom[selector].forEach(({ exists, text, properties, attributes }) => {
nodes.forEach((node) => {
if (exists) {
technologies.push({
name,
selector,
exists: '',
})
}
if (text) {
const value = node.textContent.trim()
if (value) {
technologies.push({
name,
selector,
text: value,
})
}
}
if (properties) {
Object.keys(properties).forEach((property) => {
if (Object.prototype.hasOwnProperty.call(node, property)) {
const value = node[property]
if (typeof value !== 'undefined') {
technologies.push({
name,
selector,
property,
value: toScalar(value),
})
}
}
})
}
if (attributes) {
Object.keys(attributes).forEach((attribute) => {
if (node.hasAttribute(attribute)) {
const value = node.getAttribute(attribute)
technologies.push({
name,
selector,
attribute,
value: toScalar(value),
})
}
})
}
})
})
})
return technologies
}, [])
}, technologies)
}
async function analyzeDom(dom, technologies = Wappalyzer.technologies) {
return Array.prototype.concat.apply( return Array.prototype.concat.apply(
[], [],
await Promise.all( await Promise.all(
@ -86,7 +204,7 @@ async function analyzeDom(dom) {
}) => { }) => {
await next() await next()
const technology = Wappalyzer.technologies.find( const technology = technologies.find(
({ name: _name }) => name === _name ({ name: _name }) => name === _name
) )
@ -257,6 +375,7 @@ class Site {
} }
this.analyzedUrls = {} this.analyzedUrls = {}
this.analyzedRequires = {}
this.detections = [] this.detections = []
this.listeners = {} this.listeners = {}
@ -266,6 +385,8 @@ class Site {
this.dnsChecked = false this.dnsChecked = false
this.dns = [] this.dns = []
this.cache = {}
this.probed = false this.probed = false
} }
@ -379,7 +500,7 @@ class Site {
setTimeout(async () => { setTimeout(async () => {
xhrDebounce.splice(xhrDebounce.indexOf(hostname), 1) xhrDebounce.splice(xhrDebounce.indexOf(hostname), 1)
this.onDetect(await analyze({ xhr: hostname })) await this.onDetect(url, await analyze({ xhr: hostname }))
}, 1000) }, 1000)
} }
} }
@ -435,7 +556,7 @@ class Site {
? response.securityDetails().issuer() ? response.securityDetails().issuer()
: '' : ''
this.onDetect(await analyze({ headers, certIssuer })) await this.onDetect(url, await analyze({ headers, certIssuer }))
await this.emit('response', { page, response, headers, certIssuer }) await this.emit('response', { page, response, headers, certIssuer })
} }
@ -555,135 +676,10 @@ class Site {
) )
// JavaScript // JavaScript
const js = await this.promiseTimeout( const js = await this.promiseTimeout(getJs(page), [])
page.evaluate(
(technologies) => {
return technologies.reduce((technologies, { name, chains }) => {
chains.forEach((chain) => {
chain = chain.replace(/\[([^\]]+)\]/g, '.$1')
const value = chain
.split('.')
.reduce(
(value, method) => (value ? value[method] : undefined),
window
)
if (typeof value !== 'undefined') {
technologies.push({
name,
chain,
value:
typeof value === 'string' || typeof value === 'number'
? value
: !!value,
})
}
})
return technologies
}, [])
},
Wappalyzer.technologies
.filter(({ js }) => Object.keys(js).length)
.map(({ name, js }) => ({ name, chains: Object.keys(js) }))
),
[]
)
// DOM // DOM
const dom = await this.promiseTimeout( const dom = await this.promiseTimeout(getDom(page), [])
page.evaluate(
(technologies) => {
return technologies.reduce((technologies, { name, dom }) => {
const toScalar = (value) =>
typeof value === 'string' || typeof value === 'number'
? value
: !!value
Object.keys(dom).forEach((selector) => {
let nodes = []
try {
nodes = document.querySelectorAll(selector)
} catch (error) {
// Continue
}
if (!nodes.length) {
return
}
dom[selector].forEach(
({ exists, text, properties, attributes }) => {
nodes.forEach((node) => {
if (exists) {
technologies.push({
name,
selector,
exists: '',
})
}
if (text) {
const value = node.textContent.trim()
if (value) {
technologies.push({
name,
selector,
text: value,
})
}
}
if (properties) {
Object.keys(properties).forEach((property) => {
if (
Object.prototype.hasOwnProperty.call(node, property)
) {
const value = node[property]
if (typeof value !== 'undefined') {
technologies.push({
name,
selector,
property,
value: toScalar(value),
})
}
}
})
}
if (attributes) {
Object.keys(attributes).forEach((attribute) => {
if (node.hasAttribute(attribute)) {
const value = node.getAttribute(attribute)
technologies.push({
name,
selector,
attribute,
value: toScalar(value),
})
}
})
}
})
}
)
})
return technologies
}, [])
},
Wappalyzer.technologies
.filter(({ dom }) => dom && dom.constructor === Object)
.map(({ name, dom }) => ({ name, dom }))
),
[]
)
// Cookies // Cookies
const cookies = (await page.cookies()).reduce( const cookies = (await page.cookies()).reduce(
@ -764,7 +760,7 @@ class Site {
return dns return dns
}, {}) }, {})
this.onDetect(await analyze({ dns: this.dns })) await this.onDetect(url, await analyze({ dns: this.dns }))
} }
// Validate response // Validate response
@ -780,17 +776,31 @@ class Site {
throw new Error('No response from server') throw new Error('No response from server')
} }
this.onDetect(await analyzeDom(dom)) this.cache[url.href] = {
this.onDetect(await analyzeJs(js)) page,
this.onDetect( html,
await analyze({ cookies,
url, scripts,
cookies, meta,
html, dns: this.dns,
css, }
scripts,
meta, await this.onDetect(
}) url,
(
await Promise.all([
analyzeDom(dom),
analyzeJs(js),
analyze({
url,
cookies,
html,
css,
scripts,
meta,
}),
])
).flat()
) )
const reducedLinks = Array.prototype.reduce.call( const reducedLinks = Array.prototype.reduce.call(
@ -818,13 +828,8 @@ class Site {
await this.emit('goto', { await this.emit('goto', {
page, page,
url, url,
html,
cookies,
scripts,
meta,
js,
links: reducedLinks, links: reducedLinks,
dns: this.dns, ...this.cache[url.href],
}) })
await page.close() await page.close()
@ -841,7 +846,7 @@ class Site {
throw new Error('Hostname could not be resolved') throw new Error('Hostname could not be resolved')
} }
throw new Error(error.message) throw error
} }
} }
@ -921,7 +926,7 @@ class Site {
this.log(`get ${path}: ok`) this.log(`get ${path}: ok`)
this.onDetect(await analyze({ [file]: body })) await this.onDetect(url, await analyze({ [file]: body }))
} catch (error) { } catch (error) {
this.error(`get ${path}: ${error.message || error}`) this.error(`get ${path}: ${error.message || error}`)
} }
@ -942,16 +947,71 @@ class Site {
await this.batch(links, depth, batch + 1) await this.batch(links, depth, batch + 1)
} }
onDetect(detections = []) { async onDetect(url, detections = []) {
this.detections = this.detections.concat(detections) this.detections = this.detections
.concat(detections)
.filter(
({ technology: { name }, pattern: { regex } }, index, detections) =>
detections.findIndex(
({ technology: { name: _name }, pattern: { regex: _regex } }) =>
name === _name &&
(!regex || regex.toString() === _regex.toString())
) === index
)
if (this.cache[url.href]) {
const resolved = resolve(this.detections)
this.detections.filter( const requires = Wappalyzer.requires
({ technology: { name }, pattern: { regex } }, index) => .filter(({ name, technologies }) =>
this.detections.findIndex( resolved.some(({ name: _name }) => _name === name)
({ technology: { name: _name }, pattern: { regex: _regex } }) => )
name === _name && (!regex || regex.toString() === _regex.toString()) .map(({ technologies }) => technologies)
) === index .flat()
)
await Promise.all(
Object.keys(requires).map(async (name) => {
const technologies = Wappalyzer.requires[name].technologies
this.analyzedRequires[url.href] =
this.analyzedRequires[url.href] || []
if (!this.analyzedRequires[url.href].includes(name)) {
this.analyzedRequires[url.href].push(name)
const { page, cookies, html, css, scripts, meta } =
this.cache[url.href]
const js = await this.promiseTimeout(getJs(page, technologies), [])
const dom = await this.promiseTimeout(
getDom(page, technologies),
[]
)
await this.onDetect(
url,
(
await Promise.all([
analyzeDom(dom, technologies),
analyzeJs(js, technologies),
analyze(
{
url,
cookies,
html,
css,
scripts,
meta,
},
technologies
),
])
).flat()
)
}
})
)
}
} }
async destroy() { async destroy() {