Fix JS variable processing

main
Elbert Alias 5 years ago
parent dfa36ba048
commit f3c929c18e

@ -1,20 +0,0 @@
class Browser {
constructor(options) {
this.options = options;
this.window = null;
this.document = null;
this.statusCode = null;
this.contentType = null;
this.headers = null;
this.statusCode = null;
this.contentType = null;
this.html = null;
this.js = null;
this.links = null;
this.scripts = null;
this.cookies = null;
}
}
module.exports = Browser;

@ -2,13 +2,15 @@ const { URL } = require('url')
const fs = require('fs') const fs = require('fs')
const path = require('path') const path = require('path')
const LanguageDetect = require('languagedetect') const LanguageDetect = require('languagedetect')
const Wappalyzer = require('./wappalyzer')
const { const {
setTechnologies, setTechnologies,
setCategories, setCategories,
analyze, analyze,
analyzeManyToMany, analyzeManyToMany,
resolve resolve
} = require('./wappalyzer') } = Wappalyzer
const { AWS_LAMBDA_FUNCTION_NAME, CHROMIUM_BIN } = process.env const { AWS_LAMBDA_FUNCTION_NAME, CHROMIUM_BIN } = process.env
@ -90,54 +92,17 @@ function getJs() {
return dereference(window) return dereference(window)
} }
function processJs(window, patterns) { function analyzeJs(js) {
const js = {} return Array.prototype.concat.apply(
[],
Object.keys(patterns).forEach((appName) => { js.map(({ name, chain, value }) =>
js[appName] = {} analyzeManyToMany(
Wappalyzer.technologies.find(({ name: _name }) => name === _name),
Object.keys(patterns[appName]).forEach((chain) => { 'js',
js[appName][chain] = {} { [chain]: [value] }
)
patterns[appName][chain].forEach((pattern, index) => { )
const properties = chain.split('.') )
let value = properties.reduce(
(parent, property) =>
parent && parent[property] ? parent[property] : null,
window
)
value =
typeof value === 'string' || typeof value === 'number'
? value
: !!value
if (value) {
js[appName][chain][index] = value
}
})
})
})
return js
}
function processHtml(html, maxCols, maxRows) {
if (maxCols || maxRows) {
const batches = []
const rows = html.length / maxCols
for (let i = 0; i < rows; i += 1) {
if (i < maxRows / 2 || i > rows - maxRows / 2) {
batches.push(html.slice(i * maxCols, (i + 1) * maxCols))
}
}
html = batches.join('\n')
}
return html
} }
class Driver { class Driver {
@ -236,8 +201,6 @@ class Site {
this.listeners = {} this.listeners = {}
this.headers = {}
this.pages = [] this.pages = []
} }
@ -322,23 +285,28 @@ class Site {
status: response.status() status: response.status()
} }
const headers = response.headers() const rawHeaders = response.headers()
const headers = {}
Object.keys(headers).forEach((key) => { Object.keys(rawHeaders).forEach((key) => {
this.headers[key] = [ headers[key] = [
...(this.headers[key] || []), ...(headers[key] || []),
...(Array.isArray(headers[key]) ? headers[key] : [headers[key]]) ...(Array.isArray(rawHeaders[key])
? rawHeaders[key]
: [rawHeaders[key]])
] ]
}) })
this.contentType = headers['content-type'] || null this.contentType = headers['content-type'] || null
if (response.status() >= 300 && response.status() < 400) { if (response.status() >= 300 && response.status() < 400) {
if (this.headers.location) { if (headers.location) {
url = new URL(this.headers.location.slice(-1), url) url = new URL(headers.location.slice(-1), url)
} }
} else { } else {
responseReceived = true responseReceived = true
this.onDetect(analyze(url, { headers }))
} }
} }
} catch (error) { } catch (error) {
@ -346,9 +314,10 @@ class Site {
} }
}) })
if (this.options.userAgent) { await page.setUserAgent(
await page.setUserAgent(this.options.userAgent) this.options.userAgent ||
} 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
)
try { try {
await Promise.race([ await Promise.race([
@ -363,6 +332,7 @@ class Site {
await sleep(1000) await sleep(1000)
// Links
const links = await ( const links = await (
await page.evaluateHandle(() => await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map( Array.from(document.getElementsByTagName('a')).map(
@ -378,7 +348,7 @@ class Site {
) )
).jsonValue() ).jsonValue()
// eslint-disable-next-line no-undef // Script tags
const scripts = ( const scripts = (
await ( await (
await page.evaluateHandle(() => await page.evaluateHandle(() =>
@ -389,9 +359,40 @@ class Site {
).jsonValue() ).jsonValue()
).filter((script) => script) ).filter((script) => script)
// const js = processJs(await page.evaluate(getJs), this.wappalyzer.jsPatterns) // JavaScript
// TODO const win = await page.evaluate(getJs)
const js = Wappalyzer.technologies
.filter(({ js }) => Object.keys(js).length)
.map(({ name, js }) => ({ name, chains: Object.keys(js) }))
.reduce((technologies, { name, chains }) => {
chains.forEach((chain) => {
const value = chain
.split('.')
.reduce(
(value, method) =>
value && value.hasOwnProperty(method)
? value[method]
: undefined,
win
)
if (typeof value !== 'undefined') {
technologies.push({
name,
chain,
value:
typeof value === 'string' || typeof value === 'number'
? value
: !!value
})
}
})
return technologies
}, [])
// Cookies
const cookies = (await page.cookies()).map( const cookies = (await page.cookies()).map(
({ name, value, domain, path }) => ({ ({ name, value, domain, path }) => ({
name, name,
@ -401,11 +402,29 @@ class Site {
}) })
) )
const html = processHtml( // HTML
await page.content(), let html = await page.content()
this.options.htmlMaxCols,
this.options.htmlMaxRows if (this.options.htmlMaxCols && this.options.htmlMaxRows) {
) const batches = []
const rows = html.length / this.options.htmlMaxCols
for (let i = 0; i < rows; i += 1) {
if (
i < this.options.htmlMaxRows / 2 ||
i > rows - this.options.htmlMaxRows / 2
) {
batches.push(
html.slice(
i * this.options.htmlMaxCols,
(i + 1) * this.options.htmlMaxCols
)
)
}
}
html = batches.join('\n')
}
// Validate response // Validate response
if (!this.analyzedUrls[url.href].status) { if (!this.analyzedUrls[url.href].status) {
@ -441,11 +460,12 @@ class Site {
} }
} }
await this.onDetect( this.onDetect(url, analyzeJs(js))
this.onDetect(
url, url,
await analyze(url, { analyze(url, {
cookies, cookies,
headers: this.headers,
html, html,
scripts scripts
}) })

@ -96,7 +96,6 @@ const Content = {
wappalyzer: { wappalyzer: {
technologies: technologies technologies: technologies
.filter(({ js }) => Object.keys(js).length) .filter(({ js }) => Object.keys(js).length)
.filter(({ name }) => name === 'jQuery')
.map(({ name, js }) => ({ name, chains: Object.keys(js) })) .map(({ name, js }) => ({ name, chains: Object.keys(js) }))
} }
}) })

@ -118,13 +118,11 @@ const Driver = {
url, url,
Array.prototype.concat.apply( Array.prototype.concat.apply(
[], [],
await Promise.all( js.map(({ name, chain, value }) =>
js.map(({ name, chain, value }) => analyzeManyToMany(
analyzeManyToMany( Wappalyzer.technologies.find(({ name: _name }) => name === _name),
Wappalyzer.technologies.find(({ name: _name }) => name === _name), 'js',
'js', { [chain]: [value] }
{ [chain]: [value] }
)
) )
) )
) )
@ -178,10 +176,7 @@ const Driver = {
headers['content-type'] && headers['content-type'] &&
/\/x?html/.test(headers['content-type'][0]) /\/x?html/.test(headers['content-type'][0])
) { ) {
await Driver.onDetect( await Driver.onDetect(url, analyze(url.href, { headers }, { tab }))
url,
await analyze(url.href, { headers }, { tab })
)
} }
} }
} catch (error) { } catch (error) {
@ -198,7 +193,7 @@ const Driver = {
domain: `.${url.hostname}` domain: `.${url.hostname}`
}) })
await Driver.onDetect(url, await analyze(href, items), language, true) await Driver.onDetect(url, analyze(href, items), language, true)
} catch (error) { } catch (error) {
Driver.error(error) Driver.error(error)
} }

@ -13,7 +13,7 @@
postMessage({ postMessage({
wappalyzer: { wappalyzer: {
js: technologies.reduce((results, { name, chains }) => { js: technologies.reduce((technologies, { name, chains }) => {
chains.forEach((chain) => { chains.forEach((chain) => {
const value = chain const value = chain
.split('.') .split('.')
@ -25,14 +25,16 @@
window window
) )
technologies.push({ if (value !== undefined) {
name, technologies.push({
chain, name,
value: chain,
typeof value === 'string' || typeof value === 'number' value:
? value typeof value === 'string' || typeof value === 'number'
: !!value ? value
}) : !!value
})
}
}) })
return technologies return technologies

@ -145,7 +145,7 @@ const Wappalyzer = {
} }
}, },
async analyze(url, { html, meta, headers, cookies, scripts }) { analyze(url, { html, meta, headers, cookies, scripts }) {
const oo = Wappalyzer.analyzeOneToOne const oo = Wappalyzer.analyzeOneToOne
const om = Wappalyzer.analyzeOneToMany const om = Wappalyzer.analyzeOneToMany
const mm = Wappalyzer.analyzeManyToMany const mm = Wappalyzer.analyzeManyToMany
@ -154,19 +154,15 @@ const Wappalyzer = {
try { try {
const detections = flatten( const detections = flatten(
flatten( Wappalyzer.technologies.map((technology) =>
await Promise.all( flatten([
Wappalyzer.technologies.map((technology) => oo(technology, 'url', url),
Promise.all([ oo(technology, 'html', html),
oo(technology, 'url', url), om(technology, 'meta', meta),
oo(technology, 'html', html), mm(technology, 'headers', headers),
om(technology, 'meta', meta), om(technology, 'cookies', cookies),
mm(technology, 'headers', headers), om(technology, 'scripts', scripts)
om(technology, 'cookies', cookies), ])
om(technology, 'scripts', scripts)
])
)
)
) )
).filter((technology) => technology) ).filter((technology) => technology)