You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

541 lines
12 KiB

const { URL } = require('url')
const fs = require('fs')
const LanguageDetect = require('languagedetect')
const Wappalyzer = require('./wappalyzer')
const { AWS_LAMBDA_FUNCTION_NAME } = process.env
let puppeteer
if (AWS_LAMBDA_FUNCTION_NAME) {
// eslint-disable-next-line global-require, import/no-unresolved
;({
chromium: { puppeteer }
} = require('chrome-aws-lambda'))
} else {
// eslint-disable-next-line global-require
puppeteer = require('puppeteer')
}
const languageDetect = new LanguageDetect()
languageDetect.setLanguageType('iso2')
const json = JSON.parse(fs.readFileSync('./apps.json'))
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/
const errorTypes = {
RESPONSE_NOT_OK: 'Response was not ok',
NO_RESPONSE: 'No response from server',
NO_HTML_DOCUMENT: 'No HTML document'
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms))
}
function getJs() {
const dereference = (obj, level = 0) => {
try {
// eslint-disable-next-line no-undef
if (level > 5 || (level && obj === window)) {
return '[Removed]'
}
if (Array.isArray(obj)) {
obj = obj.map((item) => dereference(item, level + 1))
}
if (
typeof obj === 'function' ||
(typeof obj === 'object' && obj !== null)
) {
const newObj = {}
Object.keys(obj).forEach((key) => {
newObj[key] = dereference(obj[key], level + 1)
})
return newObj
}
return obj
} catch (error) {
return undefined
}
}
// eslint-disable-next-line no-undef
return dereference(window)
}
function processJs(window, patterns) {
const js = {}
Object.keys(patterns).forEach((appName) => {
js[appName] = {}
Object.keys(patterns[appName]).forEach((chain) => {
js[appName][chain] = {}
patterns[appName][chain].forEach((pattern, index) => {
const properties = chain.split('.')
let value = properties.reduce(
(parent, property) =>
parent && parent[property] ? parent[property] : null,
window
)
value =
typeof value === 'string' || typeof value === 'number'
? value
: !!value
if (value) {
js[appName][chain][index] = value
}
})
})
})
return js
}
function processHtml(html, maxCols, maxRows) {
if (maxCols || maxRows) {
const batchs = []
const rows = html.length / maxCols
for (let i = 0; i < rows; i += 1) {
if (i < maxRows / 2 || i > rows - maxRows / 2) {
batchs.push(html.slice(i * maxCols, (i + 1) * maxCols))
}
}
html = batchs.join('\n')
}
return html
}
class Driver {
constructor(options = {}) {
this.options = {
batchSize: 5,
debug: false,
delay: 500,
htmlMaxCols: 2000,
htmlMaxRows: 3000,
maxDepth: 3,
maxUrls: 10,
maxWait: 5000,
recursive: false,
...options
}
this.options.debug = Boolean(+this.options.debug)
this.options.recursive = Boolean(+this.options.recursive)
this.options.delay = this.options.recursive
? parseInt(this.options.delay, 10)
: 0
this.options.maxDepth = parseInt(this.options.maxDepth, 10)
this.options.maxUrls = parseInt(this.options.maxUrls, 10)
this.options.maxWait = parseInt(this.options.maxWait, 10)
this.options.htmlMaxCols = parseInt(this.options.htmlMaxCols, 10)
this.options.htmlMaxRows = parseInt(this.options.htmlMaxRows, 10)
this.destroyed = false
}
async init() {
this.log('Launching browser...')
try {
this.browser = await puppeteer.launch({
args: [
'--no-sandbox',
'--headless',
'--disable-gpu',
'--ignore-certificate-errors'
]
})
this.browser.on('disconnected', async () => {
this.log('Browser disconnected')
if (!this.destroyed) {
await this.init()
}
})
} catch (error) {
throw new Error(error.toString())
}
}
async destroy() {
this.destroyed = true
if (this.browser) {
try {
await sleep(1)
await this.browser.close()
this.log('Done')
} catch (error) {
throw new Error(error.toString())
}
}
}
open(url) {
return new Site(url, this)
}
log(message, source = 'driver', type = 'debug') {
if (this.options.debug) {
// eslint-disable-next-line no-console
console.log(`${type.toUpperCase()} | ${source} | ${message}`)
}
}
}
class Site {
constructor(url, driver) {
;({ options: this.options, browser: this.browser } = driver)
this.driver = driver
try {
this.originalUrl = new URL(url)
} catch (error) {
throw new Error(error.message || error.toString())
}
this.wappalyzer = new Wappalyzer()
this.wappalyzer.apps = json.apps
this.wappalyzer.categories = json.categories
this.wappalyzer.parseJsPatterns()
this.wappalyzer.driver.log = (message, source, type) =>
this.log(message, source, type)
this.wappalyzer.driver.displayApps = (detected, meta, context) =>
this.displayApps(detected, meta, context)
this.analyzedUrls = {}
this.technologies = []
this.meta = {}
this.listeners = {}
this.headers = {}
}
async init() {}
on(event, callback) {
if (!this.listeners[event]) {
this.listeners[event] = []
}
this.listeners[event].push(callback)
}
emit(event, params) {
if (this.listeners[event]) {
this.listeners[event].forEach((listener) => listener(params))
}
}
log(...args) {
this.emit('log', ...args)
this.driver.log(...args)
}
async fetch(url, index, depth) {}
async goto(url) {
// Return when the URL is a duplicate or maxUrls has been reached
if (
this.analyzedUrls[url.href] ||
Object.keys(this.analyzedUrls).length >= this.options.maxUrls
) {
return
}
this.log(`Navigate to ${url}`, 'page')
this.analyzedUrls[url.href] = {
status: 0
}
if (!this.browser) {
throw new Error('Browser closed')
}
const page = await this.browser.newPage()
page.setDefaultTimeout(this.options.maxWait)
await page.setRequestInterception(true)
page.on('error', (error) => this.emit('error', error))
let responseReceived = false
page.on('request', (request) => {
try {
if (
(responseReceived && request.isNavigationRequest()) ||
request.frame() !== page.mainFrame() ||
!['document', 'script'].includes(request.resourceType())
) {
request.abort('blockedbyclient')
} else {
request.continue()
}
} catch (error) {
this.emit('error', error)
}
})
page.on('response', (response) => {
try {
if (response.url() === url.href) {
this.analyzedUrls[url.href] = {
status: response.status()
}
const headers = response.headers()
Object.keys(headers).forEach((key) => {
this.headers[key] = [
...(this.headers[key] || []),
...(Array.isArray(headers[key]) ? headers[key] : [headers[key]])
]
})
this.contentType = headers['content-type'] || null
if (response.status() >= 300 && response.status() < 400) {
if (this.headers.location) {
url = new URL(this.headers.location.slice(-1))
}
} else {
responseReceived = true
}
}
} catch (error) {
this.emit('error', error)
}
})
if (this.options.userAgent) {
await page.setUserAgent(this.options.userAgent)
}
try {
await Promise.race([
page.goto(url.href, { waitUntil: 'domcontentloaded' }),
new Promise((resolve, reject) =>
setTimeout(() => reject(new Error('Timeout')), this.options.maxWait)
)
])
} catch (error) {
this.emit('error', error)
}
await sleep(1000)
const links = await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
({ hash, hostname, href, pathname, protocol, rel }) => ({
hash,
hostname,
href,
pathname,
protocol,
rel
})
)
)
).jsonValue()
// eslint-disable-next-line no-undef
const scripts = (
await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('script')).map(
({ src }) => src
)
)
).jsonValue()
).filter((script) => script)
const js = processJs(await page.evaluate(getJs), this.wappalyzer.jsPatterns)
const cookies = (await page.cookies()).map(
({ name, value, domain, path }) => ({
name,
value,
domain,
path
})
)
const html = processHtml(
await page.content(),
this.options.htmlMaxCols,
this.options.htmlMaxRows
)
// Validate response
if (!this.analyzedUrls[url.href].status) {
throw new Error('NO_RESPONSE')
}
let language = null
try {
const [attrs] = languageDetect.detect(
html.replace(/<\/?[^>]+(>|$)/g, ' '),
1
)
if (attrs) {
;[language] = attrs
}
} catch (error) {
this.log(`${error} (${url.href})`, 'driver', 'error')
}
await this.wappalyzer.analyze(url, {
cookies,
headers: this.headers,
html,
js,
scripts,
language
})
const reducedLinks = Array.prototype.reduce.call(
links,
(results, link) => {
if (
results &&
Object.prototype.hasOwnProperty.call(
Object.getPrototypeOf(results),
'push'
) &&
link.protocol &&
link.protocol.match(/https?:/) &&
link.rel !== 'nofollow' &&
link.hostname === url.hostname &&
extensions.test(link.pathname)
) {
results.push(new URL(link.href.split('#')[0]))
}
return results
},
[]
)
this.emit('goto', url)
return reducedLinks
}
async analyze(url = this.originalUrl, index = 1, depth = 1) {
try {
await sleep(this.options.delay * index)
const links = await this.goto(url)
if (links && this.options.recursive && depth < this.options.maxDepth) {
await this.batch(links.slice(0, this.options.maxUrls), depth + 1)
}
} catch (error) {
const type =
error.message && errorTypes[error.message]
? error.message
: 'UNKNOWN_ERROR'
const message =
error.message && errorTypes[error.message]
? errorTypes[error.message]
: 'Unknown error'
this.analyzedUrls[url.href] = {
status: 0,
error: {
type,
message
}
}
this.log(`${message} (${url.href})`, 'driver', 'error')
}
return {
urls: this.analyzedUrls,
applications: this.technologies,
meta: this.meta
}
}
async batch(links, depth, batch = 0) {
if (links.length === 0) {
return
}
const batched = links.splice(0, this.options.batchSize)
await Promise.all(
batched.map((link, index) => this.analyze(link, index, depth))
)
await this.batch(links, depth, batch + 1)
}
displayApps(technologies, meta) {
this.meta = meta
Object.keys(technologies).forEach((name) => {
const {
confidenceTotal: confidence,
version,
props: { cats, icon, website, cpe }
} = technologies[name]
const categories = cats.reduce((categories, id) => {
categories[id] = json.categories[id].name
return categories
}, {})
if (!this.technologies.some(({ name: _name }) => name === _name)) {
this.technologies.push({
name,
confidence,
version: version || null,
icon: icon || 'default.svg',
website,
cpe: cpe || null,
categories
})
}
})
}
}
module.exports = Driver
module.exports.processJs = processJs
module.exports.processHtml = processHtml