From d0203c6c17d08e007362a3ef122c9054d6637b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zsombor=20Par=C3=B3czi?= Date: Sun, 22 Dec 2019 21:00:13 +0100 Subject: [PATCH] Simple browser implementation for puppeteer --- src/drivers/npm/browsers/puppeteer.js | 199 ++++++++++++++++++++++++++ src/drivers/npm/cli.js | 7 +- src/drivers/npm/package.json | 4 + 3 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 src/drivers/npm/browsers/puppeteer.js diff --git a/src/drivers/npm/browsers/puppeteer.js b/src/drivers/npm/browsers/puppeteer.js new file mode 100644 index 000000000..b2098a41f --- /dev/null +++ b/src/drivers/npm/browsers/puppeteer.js @@ -0,0 +1,199 @@ +const { Cluster } = require('puppeteer-cluster'); +const Browser = require('../browser'); + +let cluster = null; + +function puppeteerJsEvalFunction() { + const shallowMap = (origin, level) => { + try { + if (level === 0) { + return true; + } + + if (typeof origin === 'string' || typeof origin === 'number') { + return origin; + } + + if (typeof origin === 'undefined' || origin === null) { + return false; + } + + if ((typeof origin === 'object' || typeof origin === 'function') && origin.hasOwnProperty) { + const ret = {}; + Object.keys(origin) + .forEach((key) => { + ret[key] = shallowMap(origin[key], level - 1); + }); + return ret; + } + + return true; + } catch (err) { + return false; + } + }; + + // prevent cross-origin error + window.frames = []; + + // eslint-disable-next-line no-undef + return shallowMap(window, 6); +} + + +class PuppeteerBrowser extends Browser { + constructor(options) { + super(); + this.options = Object.assign( + {}, + { + puppeteerClusterOptions: { + concurrency: Cluster.CONCURRENCY_CONTEXT, + maxConcurrency: 4, + puppeteerOptions: { + headless: false, + ignoreHTTPSErrors: true, + }, + }, + }, + options, + ); + this.resources = []; + + this.links = []; + + this.window = {}; + this.cookies = []; + this.scripts = []; + this.page = null; + this.js = {}; + } + + async visit(visiturl) { + let visitcb = null; + const newPromise = new Promise((resolve, reject) => { + visitcb = (err) => { + if (err) { + return reject(err); + } + return resolve(); + }; + }); + // start cluster + if (!cluster) { + cluster = await Cluster.launch(this.puppeteerClusterOptions); + this.log('Cluster started', 'puppeteer'); + await cluster.task(async ({ page, data: { url, cb, myContext } }) => { + await myContext.visitInternal(page, url, cb); + }); + } + + await cluster.queue({ url: visiturl, cb: visitcb, myContext: this }); + return newPromise; + } + + async visitInternal(page, url, cb) { + this.log(`Opening: ${url}`, 'puppeteer'); + + this.resources = []; + this.links = []; + this.scripts = []; + this.headers = []; + this.window = {}; + + try { + await page.setRequestInterception(true); + + this.page = page; + + page.on('request', (req) => { + req.continue(); + }); + + page.on('response', (res) => { + if (res.status() === 301 || res.status() === 302) { + return; + } + const headers = res.headers(); + + if (this.resources.length === 0) { + this.statusCode = res.status(); + this.contentType = headers['content-type']; + Object.keys(headers).forEach((key) => { + if (Array.isArray(headers[key])) { + this.headers[key] = headers[key]; + } else { + this.headers[key] = [headers[key]]; + } + }); + } + + this.resources.push(res.url()); + + if ( + headers['content-type'] + && (headers['content-type'].indexOf('javascript') !== -1 + || headers['content-type'].indexOf('application/') !== -1) + ) { + this.scripts.push(res.url()); + } + }); + + // navigate + await page.setUserAgent(this.options.userAgent); + try { + if (this.options.waitDuration) { + await Promise.race([ + page.goto(url, { + timeout: this.options.waitDuration, + waitUntil: 'networkidle2', + }), + new Promise(x => setTimeout(x, this.options.waitDuration)), + ]); + } else { + await page.goto(url, { + waitUntil: 'networkidle2', + }); + } + } catch (err) { + this.log(err.toString(), 'puppeteer', 'error'); + } + + // get links + // eslint-disable-next-line no-undef + const list = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('a')).map(a => ({ + href: a.href, + hostname: a.hostname, + pathname: a.pathname, + hash: a.hash, + protocol: a.protocol, + }))); + this.links = await list.jsonValue(); + + // a very simple representation of the window object + this.js = await this.page.evaluate(puppeteerJsEvalFunction); + + // get cookies + this.cookies = await page.cookies(); + this.cookies = this.cookies.map((e) => { + e.key = e.name; + return e; + }); + + // get html + this.html = await page.content(); + + // close the page to free up memory + await page.close(); + this.page = null; + + // close everything + cb(); + } catch (err) { + this.log(err.toString(), 'puppeteer', 'error'); + cb(err); + } + } +} + +module.exports = PuppeteerBrowser; diff --git a/src/drivers/npm/cli.js b/src/drivers/npm/cli.js index 3ca29287e..2354a8eef 100755 --- a/src/drivers/npm/cli.js +++ b/src/drivers/npm/cli.js @@ -1,7 +1,7 @@ #!/usr/bin/env node const Wappalyzer = require('./driver'); -const Browser = require('./browsers/zombie'); +let Browser = require('./browsers/zombie'); const args = process.argv.slice(2); @@ -30,6 +30,11 @@ do { } } while (arg); +if (options.browser && options.browser === 'puppeteer') { + // eslint-disable-next-line global-require + Browser = require('./browsers/puppeteer'); +} + const wappalyzer = new Wappalyzer(Browser, url, options); wappalyzer.analyze() diff --git a/src/drivers/npm/package.json b/src/drivers/npm/package.json index 81bba70e8..834f721ef 100644 --- a/src/drivers/npm/package.json +++ b/src/drivers/npm/package.json @@ -14,6 +14,7 @@ "apps.json", "browser.js", "browsers/zombie.js", + "browsers/puppeteer.js", "cli.js", "driver.js", "index.js", @@ -24,5 +25,8 @@ }, "dependencies": { "zombie": "^6.1.2" + }, + "peerDependencies": { + "puppeteer-cluster": "^0.18.0" } }