Fix JS variable processing

main
Elbert Alias 5 years ago
parent dfa36ba048
commit f3c929c18e

@ -1,20 +0,0 @@
class Browser {
constructor(options) {
this.options = options;
this.window = null;
this.document = null;
this.statusCode = null;
this.contentType = null;
this.headers = null;
this.statusCode = null;
this.contentType = null;
this.html = null;
this.js = null;
this.links = null;
this.scripts = null;
this.cookies = null;
}
}
module.exports = Browser;

@ -2,13 +2,15 @@ const { URL } = require('url')
const fs = require('fs')
const path = require('path')
const LanguageDetect = require('languagedetect')
const Wappalyzer = require('./wappalyzer')
const {
setTechnologies,
setCategories,
analyze,
analyzeManyToMany,
resolve
} = require('./wappalyzer')
} = Wappalyzer
const { AWS_LAMBDA_FUNCTION_NAME, CHROMIUM_BIN } = process.env
@ -90,54 +92,17 @@ function getJs() {
return dereference(window)
}
function processJs(window, patterns) {
const js = {}
Object.keys(patterns).forEach((appName) => {
js[appName] = {}
Object.keys(patterns[appName]).forEach((chain) => {
js[appName][chain] = {}
patterns[appName][chain].forEach((pattern, index) => {
const properties = chain.split('.')
let value = properties.reduce(
(parent, property) =>
parent && parent[property] ? parent[property] : null,
window
function analyzeJs(js) {
return Array.prototype.concat.apply(
[],
js.map(({ name, chain, value }) =>
analyzeManyToMany(
Wappalyzer.technologies.find(({ name: _name }) => name === _name),
'js',
{ [chain]: [value] }
)
)
)
value =
typeof value === 'string' || typeof value === 'number'
? value
: !!value
if (value) {
js[appName][chain][index] = value
}
})
})
})
return js
}
function processHtml(html, maxCols, maxRows) {
if (maxCols || maxRows) {
const batches = []
const rows = html.length / maxCols
for (let i = 0; i < rows; i += 1) {
if (i < maxRows / 2 || i > rows - maxRows / 2) {
batches.push(html.slice(i * maxCols, (i + 1) * maxCols))
}
}
html = batches.join('\n')
}
return html
}
class Driver {
@ -236,8 +201,6 @@ class Site {
this.listeners = {}
this.headers = {}
this.pages = []
}
@ -322,23 +285,28 @@ class Site {
status: response.status()
}
const headers = response.headers()
const rawHeaders = response.headers()
const headers = {}
Object.keys(headers).forEach((key) => {
this.headers[key] = [
...(this.headers[key] || []),
...(Array.isArray(headers[key]) ? headers[key] : [headers[key]])
Object.keys(rawHeaders).forEach((key) => {
headers[key] = [
...(headers[key] || []),
...(Array.isArray(rawHeaders[key])
? rawHeaders[key]
: [rawHeaders[key]])
]
})
this.contentType = headers['content-type'] || null
if (response.status() >= 300 && response.status() < 400) {
if (this.headers.location) {
url = new URL(this.headers.location.slice(-1), url)
if (headers.location) {
url = new URL(headers.location.slice(-1), url)
}
} else {
responseReceived = true
this.onDetect(analyze(url, { headers }))
}
}
} catch (error) {
@ -346,9 +314,10 @@ class Site {
}
})
if (this.options.userAgent) {
await page.setUserAgent(this.options.userAgent)
}
await page.setUserAgent(
this.options.userAgent ||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
)
try {
await Promise.race([
@ -363,6 +332,7 @@ class Site {
await sleep(1000)
// Links
const links = await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
@ -378,7 +348,7 @@ class Site {
)
).jsonValue()
// eslint-disable-next-line no-undef
// Script tags
const scripts = (
await (
await page.evaluateHandle(() =>
@ -389,9 +359,40 @@ class Site {
).jsonValue()
).filter((script) => script)
// const js = processJs(await page.evaluate(getJs), this.wappalyzer.jsPatterns)
// TODO
// JavaScript
const win = await page.evaluate(getJs)
const js = Wappalyzer.technologies
.filter(({ js }) => Object.keys(js).length)
.map(({ name, js }) => ({ name, chains: Object.keys(js) }))
.reduce((technologies, { name, chains }) => {
chains.forEach((chain) => {
const value = chain
.split('.')
.reduce(
(value, method) =>
value && value.hasOwnProperty(method)
? value[method]
: undefined,
win
)
if (typeof value !== 'undefined') {
technologies.push({
name,
chain,
value:
typeof value === 'string' || typeof value === 'number'
? value
: !!value
})
}
})
return technologies
}, [])
// Cookies
const cookies = (await page.cookies()).map(
({ name, value, domain, path }) => ({
name,
@ -401,11 +402,29 @@ class Site {
})
)
const html = processHtml(
await page.content(),
this.options.htmlMaxCols,
this.options.htmlMaxRows
// HTML
let html = await page.content()
if (this.options.htmlMaxCols && this.options.htmlMaxRows) {
const batches = []
const rows = html.length / this.options.htmlMaxCols
for (let i = 0; i < rows; i += 1) {
if (
i < this.options.htmlMaxRows / 2 ||
i > rows - this.options.htmlMaxRows / 2
) {
batches.push(
html.slice(
i * this.options.htmlMaxCols,
(i + 1) * this.options.htmlMaxCols
)
)
}
}
html = batches.join('\n')
}
// Validate response
if (!this.analyzedUrls[url.href].status) {
@ -441,11 +460,12 @@ class Site {
}
}
await this.onDetect(
this.onDetect(url, analyzeJs(js))
this.onDetect(
url,
await analyze(url, {
analyze(url, {
cookies,
headers: this.headers,
html,
scripts
})

@ -96,7 +96,6 @@ const Content = {
wappalyzer: {
technologies: technologies
.filter(({ js }) => Object.keys(js).length)
.filter(({ name }) => name === 'jQuery')
.map(({ name, js }) => ({ name, chains: Object.keys(js) }))
}
})

@ -118,7 +118,6 @@ const Driver = {
url,
Array.prototype.concat.apply(
[],
await Promise.all(
js.map(({ name, chain, value }) =>
analyzeManyToMany(
Wappalyzer.technologies.find(({ name: _name }) => name === _name),
@ -128,7 +127,6 @@ const Driver = {
)
)
)
)
},
onRuntimeConnect(port) {
@ -178,10 +176,7 @@ const Driver = {
headers['content-type'] &&
/\/x?html/.test(headers['content-type'][0])
) {
await Driver.onDetect(
url,
await analyze(url.href, { headers }, { tab })
)
await Driver.onDetect(url, analyze(url.href, { headers }, { tab }))
}
}
} catch (error) {
@ -198,7 +193,7 @@ const Driver = {
domain: `.${url.hostname}`
})
await Driver.onDetect(url, await analyze(href, items), language, true)
await Driver.onDetect(url, analyze(href, items), language, true)
} catch (error) {
Driver.error(error)
}

@ -13,7 +13,7 @@
postMessage({
wappalyzer: {
js: technologies.reduce((results, { name, chains }) => {
js: technologies.reduce((technologies, { name, chains }) => {
chains.forEach((chain) => {
const value = chain
.split('.')
@ -25,6 +25,7 @@
window
)
if (value !== undefined) {
technologies.push({
name,
chain,
@ -33,6 +34,7 @@
? value
: !!value
})
}
})
return technologies

@ -145,7 +145,7 @@ const Wappalyzer = {
}
},
async analyze(url, { html, meta, headers, cookies, scripts }) {
analyze(url, { html, meta, headers, cookies, scripts }) {
const oo = Wappalyzer.analyzeOneToOne
const om = Wappalyzer.analyzeOneToMany
const mm = Wappalyzer.analyzeManyToMany
@ -154,10 +154,8 @@ const Wappalyzer = {
try {
const detections = flatten(
flatten(
await Promise.all(
Wappalyzer.technologies.map((technology) =>
Promise.all([
flatten([
oo(technology, 'url', url),
oo(technology, 'html', html),
om(technology, 'meta', meta),
@ -166,8 +164,6 @@ const Wappalyzer = {
om(technology, 'scripts', scripts)
])
)
)
)
).filter((technology) => technology)
return detections