Add puppeteer driver

main
Elbert Alias 4 years ago
parent e963572d84
commit be4e736052

@ -1,9 +1,13 @@
# editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
indent_size = 2
indent_style = space
insert_final_newline = true
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.md]
trim_trailing_whitespace = false

@ -1,6 +1,20 @@
module.exports = {
"extends": "airbnb-base",
"rules": {
"no-param-reassign": 0
}
};
root: true,
env: {
browser: true,
node: true
},
parserOptions: {
parser: 'babel-eslint'
},
extends: [
'@nuxtjs',
'prettier',
'prettier/vue',
'plugin:prettier/recommended',
'plugin:nuxt/recommended'
],
plugins: [
'prettier'
],
}

@ -0,0 +1,5 @@
{
"semi": false,
"arrowParens": "always",
"singleQuote": true
}

1781
npm-shrinkwrap.json generated

File diff suppressed because it is too large Load Diff

@ -5,11 +5,17 @@
"read-chunk": "2.1.*"
},
"devDependencies": {
"chai": "^4.1.2",
"eslint": "^4.19.1",
"eslint-config-airbnb-base": "^13.0.0",
"eslint-plugin-import": "^2.13.0",
"mocha": "^5.2.0"
"@nuxtjs/eslint-config": "^1.0.1",
"@nuxtjs/eslint-module": "^1.2.0",
"babel-eslint": "^10.1.0",
"chai": "^4.2.0",
"dotenv": "^8.2.0",
"eslint": "^6.1.0",
"eslint-config-prettier": "^4.1.0",
"eslint-plugin-nuxt": "^0.5.2",
"eslint-plugin-prettier": "^3.1.3",
"mocha": "^5.2.0",
"prettier": "^1.16.4"
},
"scripts": {
"test": "mocha -R spec src",

@ -3369,8 +3369,7 @@
"icon": "elm.svg",
"js": {
"Elm.Main.init": "\\;version:0.19",
"Elm.Main.embed": "\\;version:0.18",
"Elm": "\\;confidence:50"
"Elm.Main.embed": "\\;version:0.18"
},
"website": "https://elm-lang.org/"
},

@ -1,218 +1,245 @@
const {
AWS_LAMBDA_FUNCTION_NAME,
CHROME_BIN,
} = process.env;
const { AWS_LAMBDA_FUNCTION_NAME, CHROME_BIN } = process.env
let chromium;
let puppeteer;
let chromium
let puppeteer
if (AWS_LAMBDA_FUNCTION_NAME) {
// eslint-disable-next-line global-require, import/no-unresolved
chromium = require('chrome-aws-lambda');
({ puppeteer } = chromium);
chromium = require('chrome-aws-lambda')
;({ puppeteer } = chromium)
} else {
// eslint-disable-next-line global-require
puppeteer = require('puppeteer');
puppeteer = require('puppeteer')
}
const Browser = require('../browser');
const Browser = require('../browser')
function getJs() {
const dereference = (obj, level = 0) => {
try {
// eslint-disable-next-line no-undef
if (level > 5 || (level && obj === window)) {
return '[Removed]';
return '[Removed]'
}
if (Array.isArray(obj)) {
obj = obj.map(item => dereference(item, level + 1));
obj = obj.map((item) => dereference(item, level + 1))
}
if (typeof obj === 'function' || (typeof obj === 'object' && obj !== null)) {
const newObj = {};
if (
typeof obj === 'function' ||
(typeof obj === 'object' && obj !== null)
) {
const newObj = {}
Object.keys(obj).forEach((key) => {
newObj[key] = dereference(obj[key], level + 1);
});
newObj[key] = dereference(obj[key], level + 1)
})
return newObj;
return newObj
}
return obj;
return obj
} catch (error) {
return undefined;
return undefined
}
};
}
// eslint-disable-next-line no-undef
return dereference(window);
return dereference(window)
}
class PuppeteerBrowser extends Browser {
constructor(options) {
options.maxWait = options.maxWait || 60;
options.maxWait = options.maxWait || 60
super(options);
super(options)
}
async visit(url) {
let done = false;
let browser;
let done = false
let browser
try {
await new Promise(async (resolve, reject) => {
try {
browser = await puppeteer.launch(chromium ? {
args: [...chromium.args, '--ignore-certificate-errors'],
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath,
headless: chromium.headless,
} : {
args: ['--no-sandbox', '--headless', '--disable-gpu', '--ignore-certificate-errors'],
executablePath: CHROME_BIN,
});
browser = await puppeteer.launch(
chromium
? {
args: [...chromium.args, '--ignore-certificate-errors'],
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath,
headless: chromium.headless
}
: {
args: [
'--no-sandbox',
'--headless',
'--disable-gpu',
'--ignore-certificate-errors'
],
executablePath: CHROME_BIN
}
)
browser.on('disconnected', () => {
if (!done) {
reject(new Error('browser: disconnected'));
reject(new Error('browser: disconnected'))
}
});
})
const page = await browser.newPage();
const page = await browser.newPage()
page.setDefaultTimeout(this.options.maxWait * 1.1);
page.setDefaultTimeout(this.options.maxWait * 1.1)
await page.setRequestInterception(true);
await page.setRequestInterception(true)
page.on('error', error => reject(new Error(`page error: ${error.message || error}`)));
page.on('error', (error) =>
reject(new Error(`page error: ${error.message || error}`))
)
let responseReceived = false;
let responseReceived = false
page.on('request', (request) => {
try {
if (
responseReceived
&& request.isNavigationRequest()
&& request.frame() === page.mainFrame()
&& request.url() !== url
responseReceived &&
request.isNavigationRequest() &&
request.frame() === page.mainFrame() &&
request.url() !== url
) {
this.log(`abort navigation to ${request.url()}`);
this.log(`abort navigation to ${request.url()}`)
request.abort('aborted');
request.abort('aborted')
} else if (!done) {
if (!['document', 'script'].includes(request.resourceType())) {
request.abort();
request.abort()
} else {
request.continue();
request.continue()
}
}
} catch (error) {
reject(new Error(`page error: ${error.message || error}`));
reject(new Error(`page error: ${error.message || error}`))
}
});
})
page.on('response', (response) => {
try {
if (!this.statusCode) {
this.statusCode = response.status();
this.statusCode = response.status()
this.headers = {};
this.headers = {}
const headers = response.headers();
const headers = response.headers()
Object.keys(headers).forEach((key) => {
this.headers[key] = Array.isArray(headers[key]) ? headers[key] : [headers[key]];
});
this.headers[key] = Array.isArray(headers[key])
? headers[key]
: [headers[key]]
})
this.contentType = headers['content-type'] || null;
this.contentType = headers['content-type'] || null
}
if (response.status() < 300 || response.status() > 399) {
responseReceived = true;
responseReceived = true
}
} catch (error) {
reject(new Error(`page error: ${error.message || error}`));
reject(new Error(`page error: ${error.message || error}`))
}
});
})
page.on('console', ({ _type, _text, _location }) => {
if (!/Failed to load resource: net::ERR_FAILED/.test(_text)) {
this.log(`${_text} (${_location.url}: ${_location.lineNumber})`, _type);
this.log(
`${_text} (${_location.url}: ${_location.lineNumber})`,
_type
)
}
});
})
if (this.options.userAgent) {
await page.setUserAgent(this.options.userAgent);
await page.setUserAgent(this.options.userAgent)
}
try {
await Promise.race([
page.goto(url, { waitUntil: 'domcontentloaded' }),
// eslint-disable-next-line no-shadow
new Promise((resolve, reject) => setTimeout(() => reject(new Error('timeout')), this.options.maxWait)),
]);
new Promise((resolve, reject) =>
setTimeout(
() => reject(new Error('timeout')),
this.options.maxWait
)
)
])
} catch (error) {
throw new Error(error.message || error.toString());
throw new Error(error.message || error.toString())
}
// eslint-disable-next-line no-undef
const links = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('a')).map(({
hash, hostname, href, pathname, protocol, rel,
}) => ({
hash,
hostname,
href,
pathname,
protocol,
rel,
})));
this.links = await links.jsonValue();
const links = await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
({ hash, hostname, href, pathname, protocol, rel }) => ({
hash,
hostname,
href,
pathname,
protocol,
rel
})
)
)
this.links = await links.jsonValue()
// eslint-disable-next-line no-undef
const scripts = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('script')).map(({
src,
}) => src));
const scripts = await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('script')).map(
({ src }) => src
)
)
this.scripts = (await scripts.jsonValue()).filter(script => script);
this.scripts = (await scripts.jsonValue()).filter((script) => script)
this.js = await page.evaluate(getJs);
this.js = await page.evaluate(getJs)
this.cookies = (await page.cookies()).map(({
name, value, domain, path,
}) => ({
name, value, domain, path,
}));
this.cookies = (await page.cookies()).map(
({ name, value, domain, path }) => ({
name,
value,
domain,
path
})
)
this.html = await page.content();
this.html = await page.content()
resolve();
resolve()
} catch (error) {
reject(new Error(`visit error: ${error.message || error}`));
reject(new Error(`visit error: ${error.message || error}`))
}
});
})
} catch (error) {
this.log(`visit error: ${error.message || error} (${url})`, 'error');
this.log(`visit error: ${error.message || error} (${url})`, 'error')
throw new Error(error.message || error.toString());
throw new Error(error.message || error.toString())
} finally {
done = true;
done = true
if (browser) {
try {
await browser.close();
await browser.close()
this.log('browser close ok');
this.log('browser close ok')
} catch (error) {
this.log(`browser close error: ${error.message || error}`, 'error');
this.log(`browser close error: ${error.message || error}`, 'error')
}
}
}
this.log(`visit ok (${url})`);
this.log(`visit ok (${url})`)
}
}
module.exports = PuppeteerBrowser;
module.exports = PuppeteerBrowser

@ -0,0 +1,3 @@
/apps.json
/wappalyzer.js
/node_modules

@ -0,0 +1,34 @@
FROM node:12-alpine
MAINTAINER Wappalyzer <info@wappalyzer.com>
ENV WAPPALYZER_ROOT /opt/wappalyzer
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
ENV CHROME_BIN /usr/bin/chromium-browser
RUN apk update && apk add --no-cache \
nodejs \
nodejs-npm \
udev \
chromium \
ttf-freefont
RUN mkdir -p "$WAPPALYZER_ROOT/browsers"
WORKDIR "$WAPPALYZER_ROOT"
ADD apps.json .
ADD browser.js .
ADD browsers/zombie.js ./browsers
ADD browsers/puppeteer.js ./browsers
ADD cli.js .
ADD driver.js .
ADD index.js .
ADD package.json .
ADD wappalyzer.js .
RUN npm i && npm i puppeteer
RUN /usr/bin/chromium-browser --version
ENTRYPOINT ["node", "cli.js"]

@ -0,0 +1,96 @@
# Wappalyzer
[Wappalyzer](https://www.wappalyzer.com/) is a
[cross-platform](https://www.wappalyzer.com/nodejs) utility that uncovers the
technologies used on websites. It detects
[content management systems](https://www.wappalyzer.com/technologies/cms), [ecommerce platforms](https://www.wappalyzer.com/technologies/ecommerce), [web servers](https://www.wappalyzer.com/technologies/web-servers), [JavaScript frameworks](https://www.wappalyzer.com/technologies/javascript-frameworks),
[analytics tools](https://www.wappalyzer.com/technologies/analytics) and
[many more](https://www.wappalyzer.com/technologies).
## Installation
```shell
$ npm i -g wappalyzer # Globally
$ npm i wappalyzer --save # As a dependency
```
To use Puppeteer (headless Chrome browser), you must install the NPM package manually:
```shell
$ npm i puppeteer@^2.0.0
```
## Run from the command line
```
wappalyzer <url> [options]
```
### Options
```
-b, --browser=... Specify which headless browser to use (zombie or puppeteer)
-c, --chunk-size=... Process links in chunks
-d, --debug Output debug messages
-t, --delay=ms Wait for ms milliseconds between requests
-h, --help This text
--html-max-cols=... Limit the number of HTML characters per line processed
--html-max-rows=... Limit the number of HTML lines processed
-D, --max-depth=... Don't analyse pages more than num levels deep
-m, --max-urls=... Exit when num URLs have been analysed
-w, --max-wait=... Wait no more than ms milliseconds for page resources to load
-p, --password=... Password to be used for basic HTTP authentication (zombie only)
-P, --pretty Pretty-print JSON output
--proxy=... Proxy URL, e.g. 'http://user:pass@proxy:8080' (zombie only)
-r, --recursive Follow links on pages (crawler)
-a, --user-agent=... Set the user agent string
-u, --username=... Username to be used for basic HTTP authentication (zombie only)
```
## Run from a script
```javascript
const Wappalyzer = require('wappalyzer');
const url = 'https://www.wappalyzer.com';
const options = {
// browser: 'puppeteer',
debug: false,
delay: 500,
maxDepth: 3,
maxUrls: 10,
maxWait: 5000,
recursive: true,
userAgent: 'Wappalyzer',
htmlMaxCols: 2000,
htmlMaxRows: 2000,
};
const wappalyzer = new Wappalyzer(url, options);
// Optional: capture log output
// wappalyzer.on('log', params => {
// const { message, source, type } = params;
// });
// Optional: do something on page visit
// wappalyzer.on('visit', params => {
// const { browser, pageUrl } = params;
// });
wappalyzer.analyze()
.then((json) => {
process.stdout.write(`${JSON.stringify(json, null, 2)}\n`);
process.exit(0);
})
.catch((error) => {
process.stderr.write(`${error}\n`);
process.exit(1);
});
```

@ -0,0 +1,20 @@
class Browser {
constructor(options) {
this.options = options;
this.window = null;
this.document = null;
this.statusCode = null;
this.contentType = null;
this.headers = null;
this.statusCode = null;
this.contentType = null;
this.html = null;
this.js = null;
this.links = null;
this.scripts = null;
this.cookies = null;
}
}
module.exports = Browser;

@ -0,0 +1,109 @@
#!/usr/bin/env node
const Wappalyzer = require('./driver')
const args = process.argv.slice(2)
const options = {}
let url
let arg
const aliases = {
a: 'userAgent',
b: 'batchSize',
d: 'debug',
t: 'delay',
h: 'help',
D: 'maxDepth',
m: 'maxUrls',
P: 'pretty',
r: 'recursive',
w: 'maxWait'
}
while (true) {
// eslint-disable-line no-constant-condition
arg = args.shift()
if (!arg) {
break
}
const matches = /^-?-([^=]+)(?:=(.+)?)?/.exec(arg)
if (matches) {
const key =
aliases[matches[1]] ||
matches[1].replace(/-\w/g, (_matches) => _matches[1].toUpperCase())
// eslint-disable-next-line no-nested-ternary
const value = matches[2]
? matches[2]
: args[0] && !args[0].startsWith('-')
? args.shift()
: true
options[key] = value
} else {
url = arg
}
}
if (!url || options.help) {
process.stdout.write(`Usage:
wappalyzer <url> [options]
Examples:
wappalyzer https://www.example.com
node cli.js https://www.example.com -r -D 3 -m 50
docker wappalyzer/cli https://www.example.com --pretty
Options:
-b, --batch-size=... Process links in batches
-d, --debug Output debug messages
-t, --delay=ms Wait for ms milliseconds between requests
-h, --help This text
--html-max-cols=... Limit the number of HTML characters per line processed
--html-max-rows=... Limit the number of HTML lines processed
-D, --max-depth=... Don't analyse pages more than num levels deep
-m, --max-urls=... Exit when num URLs have been analysed
-w, --max-wait=... Wait no more than ms milliseconds for page resources to load
-P, --pretty Pretty-print JSON output
-r, --recursive Follow links on pages (crawler)
-a, --user-agent=... Set the user agent string
`)
process.exit(1)
}
;(async function() {
const wappalyzer = await new Wappalyzer(options)
try {
await wappalyzer.init()
const site = await wappalyzer.open(url)
site.on('error', (error) => {
process.stderr.write(`page error: ${error}\n`)
})
await new Promise((resolve) => setTimeout(resolve, 1000))
const results = await site.analyze()
process.stdout.write(
`${JSON.stringify(results, null, options.pretty ? 2 : null)}\n`
)
await wappalyzer.destroy()
process.exit(0)
} catch (error) {
process.stderr.write(error.toString())
await wappalyzer.destroy()
process.exit(1)
}
})()

@ -0,0 +1,540 @@
const { URL } = require('url')
const fs = require('fs')
const LanguageDetect = require('languagedetect')
const Wappalyzer = require('./wappalyzer')
const { AWS_LAMBDA_FUNCTION_NAME } = process.env
let puppeteer
if (AWS_LAMBDA_FUNCTION_NAME) {
// eslint-disable-next-line global-require, import/no-unresolved
;({
chromium: { puppeteer }
} = require('chrome-aws-lambda'))
} else {
// eslint-disable-next-line global-require
puppeteer = require('puppeteer')
}
const languageDetect = new LanguageDetect()
languageDetect.setLanguageType('iso2')
const json = JSON.parse(fs.readFileSync('./apps.json'))
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/
const errorTypes = {
RESPONSE_NOT_OK: 'Response was not ok',
NO_RESPONSE: 'No response from server',
NO_HTML_DOCUMENT: 'No HTML document'
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms))
}
function getJs() {
const dereference = (obj, level = 0) => {
try {
// eslint-disable-next-line no-undef
if (level > 5 || (level && obj === window)) {
return '[Removed]'
}
if (Array.isArray(obj)) {
obj = obj.map((item) => dereference(item, level + 1))
}
if (
typeof obj === 'function' ||
(typeof obj === 'object' && obj !== null)
) {
const newObj = {}
Object.keys(obj).forEach((key) => {
newObj[key] = dereference(obj[key], level + 1)
})
return newObj
}
return obj
} catch (error) {
return undefined
}
}
// eslint-disable-next-line no-undef
return dereference(window)
}
function processJs(window, patterns) {
const js = {}
Object.keys(patterns).forEach((appName) => {
js[appName] = {}
Object.keys(patterns[appName]).forEach((chain) => {
js[appName][chain] = {}
patterns[appName][chain].forEach((pattern, index) => {
const properties = chain.split('.')
let value = properties.reduce(
(parent, property) =>
parent && parent[property] ? parent[property] : null,
window
)
value =
typeof value === 'string' || typeof value === 'number'
? value
: !!value
if (value) {
js[appName][chain][index] = value
}
})
})
})
return js
}
function processHtml(html, maxCols, maxRows) {
if (maxCols || maxRows) {
const batchs = []
const rows = html.length / maxCols
for (let i = 0; i < rows; i += 1) {
if (i < maxRows / 2 || i > rows - maxRows / 2) {
batchs.push(html.slice(i * maxCols, (i + 1) * maxCols))
}
}
html = batchs.join('\n')
}
return html
}
class Driver {
constructor(options = {}) {
this.options = {
batchSize: 5,
debug: false,
delay: 500,
htmlMaxCols: 2000,
htmlMaxRows: 3000,
maxDepth: 3,
maxUrls: 10,
maxWait: 5000,
recursive: false,
...options
}
this.options.debug = Boolean(+this.options.debug)
this.options.recursive = Boolean(+this.options.recursive)
this.options.delay = this.options.recursive
? parseInt(this.options.delay, 10)
: 0
this.options.maxDepth = parseInt(this.options.maxDepth, 10)
this.options.maxUrls = parseInt(this.options.maxUrls, 10)
this.options.maxWait = parseInt(this.options.maxWait, 10)
this.options.htmlMaxCols = parseInt(this.options.htmlMaxCols, 10)
this.options.htmlMaxRows = parseInt(this.options.htmlMaxRows, 10)
this.destroyed = false
}
async init() {
this.log('Launching browser...')
try {
this.browser = await puppeteer.launch({
args: [
'--no-sandbox',
'--headless',
'--disable-gpu',
'--ignore-certificate-errors'
]
})
this.browser.on('disconnected', async () => {
this.log('Browser disconnected')
if (!this.destroyed) {
await this.init()
}
})
} catch (error) {
throw new Error(error.toString())
}
}
async destroy() {
this.destroyed = true
if (this.browser) {
try {
await sleep(1)
await this.browser.close()
this.log('Done')
} catch (error) {
throw new Error(error.toString())
}
}
}
open(url) {
return new Site(url, this)
}
log(message, source = 'driver', type = 'debug') {
if (this.options.debug) {
// eslint-disable-next-line no-console
console.log(`${type.toUpperCase()} | ${source} | ${message}`)
}
}
}
class Site {
constructor(url, driver) {
;({ options: this.options, browser: this.browser } = driver)
this.driver = driver
try {
this.originalUrl = new URL(url)
} catch (error) {
throw new Error(error.message || error.toString())
}
this.wappalyzer = new Wappalyzer()
this.wappalyzer.apps = json.apps
this.wappalyzer.categories = json.categories
this.wappalyzer.parseJsPatterns()
this.wappalyzer.driver.log = (message, source, type) =>
this.log(message, source, type)
this.wappalyzer.driver.displayApps = (detected, meta, context) =>
this.displayApps(detected, meta, context)
this.analyzedUrls = {}
this.technologies = []
this.meta = {}
this.listeners = {}
this.headers = {}
}
async init() {}
on(event, callback) {
if (!this.listeners[event]) {
this.listeners[event] = []
}
this.listeners[event].push(callback)
}
emit(event, params) {
if (this.listeners[event]) {
this.listeners[event].forEach((listener) => listener(params))
}
}
log(...args) {
this.emit('log', ...args)
this.driver.log(...args)
}
async fetch(url, index, depth) {}
async goto(url) {
// Return when the URL is a duplicate or maxUrls has been reached
if (
this.analyzedUrls[url.href] ||
Object.keys(this.analyzedUrls).length >= this.options.maxUrls
) {
return
}
this.log(`Navigate to ${url}`, 'page')
this.analyzedUrls[url.href] = {
status: 0
}
if (!this.browser) {
throw new Error('Browser closed')
}
const page = await this.browser.newPage()
page.setDefaultTimeout(this.options.maxWait)
await page.setRequestInterception(true)
page.on('error', (error) => this.emit('error', error))
let responseReceived = false
page.on('request', (request) => {
try {
if (
(responseReceived && request.isNavigationRequest()) ||
request.frame() !== page.mainFrame() ||
!['document', 'script'].includes(request.resourceType())
) {
request.abort('blockedbyclient')
} else {
request.continue()
}
} catch (error) {
this.emit('error', error)
}
})
page.on('response', (response) => {
try {
if (response.url() === url.href) {
this.analyzedUrls[url.href] = {
status: response.status()
}
const headers = response.headers()
Object.keys(headers).forEach((key) => {
this.headers[key] = [
...(this.headers[key] || []),
...(Array.isArray(headers[key]) ? headers[key] : [headers[key]])
]
})
this.contentType = headers['content-type'] || null
if (response.status() >= 300 && response.status() < 400) {
if (this.headers.location) {
url = new URL(this.headers.location.slice(-1))
}
} else {
responseReceived = true
}
}
} catch (error) {
this.emit('error', error)
}
})
if (this.options.userAgent) {
await page.setUserAgent(this.options.userAgent)
}
try {
await Promise.race([
page.goto(url.href, { waitUntil: 'domcontentloaded' }),
new Promise((resolve, reject) =>
setTimeout(() => reject(new Error('Timeout')), this.options.maxWait)
)
])
} catch (error) {
this.emit('error', error)
}
await sleep(1000)
const links = await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
({ hash, hostname, href, pathname, protocol, rel }) => ({
hash,
hostname,
href,
pathname,
protocol,
rel
})
)
)
).jsonValue()
// eslint-disable-next-line no-undef
const scripts = (
await (
await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('script')).map(
({ src }) => src
)
)
).jsonValue()
).filter((script) => script)
const js = processJs(await page.evaluate(getJs), this.wappalyzer.jsPatterns)
const cookies = (await page.cookies()).map(
({ name, value, domain, path }) => ({
name,
value,
domain,
path
})
)
const html = processHtml(
await page.content(),
this.options.htmlMaxCols,
this.options.htmlMaxRows
)
// Validate response
if (!this.analyzedUrls[url.href].status) {
throw new Error('NO_RESPONSE')
}
let language = null
try {
const [attrs] = languageDetect.detect(
html.replace(/<\/?[^>]+(>|$)/g, ' '),
1
)
if (attrs) {
;[language] = attrs
}
} catch (error) {
this.log(`${error} (${url.href})`, 'driver', 'error')
}
await this.wappalyzer.analyze(url, {
cookies,
headers: this.headers,
html,
js,
scripts,
language
})
const reducedLinks = Array.prototype.reduce.call(
links,
(results, link) => {
if (
results &&
Object.prototype.hasOwnProperty.call(
Object.getPrototypeOf(results),
'push'
) &&
link.protocol &&
link.protocol.match(/https?:/) &&
link.rel !== 'nofollow' &&
link.hostname === url.hostname &&
extensions.test(link.pathname)
) {
results.push(new URL(link.href.split('#')[0]))
}
return results
},
[]
)
this.emit('goto', url)
return reducedLinks
}
async analyze(url = this.originalUrl, index = 1, depth = 1) {
try {
await sleep(this.options.delay * index)
const links = await this.goto(url)
if (links && this.options.recursive && depth < this.options.maxDepth) {
await this.batch(links.slice(0, this.options.maxUrls), depth + 1)
}
} catch (error) {
const type =
error.message && errorTypes[error.message]
? error.message
: 'UNKNOWN_ERROR'
const message =
error.message && errorTypes[error.message]
? errorTypes[error.message]
: 'Unknown error'
this.analyzedUrls[url.href] = {
status: 0,
error: {
type,
message
}
}
this.log(`${message} (${url.href})`, 'driver', 'error')
}
return {
urls: this.analyzedUrls,
applications: this.technologies,
meta: this.meta
}
}
async batch(links, depth, batch = 0) {
if (links.length === 0) {
return
}
const batched = links.splice(0, this.options.batchSize)
await Promise.all(
batched.map((link, index) => this.analyze(link, index, depth))
)
await this.batch(links, depth, batch + 1)
}
displayApps(technologies, meta) {
this.meta = meta
Object.keys(technologies).forEach((name) => {
const {
confidenceTotal: confidence,
version,
props: { cats, icon, website, cpe }
} = technologies[name]
const categories = cats.reduce((categories, id) => {
categories[id] = json.categories[id].name
return categories
}, {})
if (!this.technologies.some(({ name: _name }) => name === _name)) {
this.technologies.push({
name,
confidence,
version: version || null,
icon: icon || 'default.svg',
website,
cpe: cpe || null,
categories
})
}
})
}
}
module.exports = Driver
module.exports.processJs = processJs
module.exports.processHtml = processHtml

@ -0,0 +1,12 @@
const Driver = require('./driver');
class Wappalyzer {
constructor(pageUrl, options) {
// eslint-disable-next-line import/no-dynamic-require, global-require
const Browser = require(`./browsers/${options.browser || 'zombie'}`);
return new Driver(Browser, pageUrl, options);
}
}
module.exports = Wappalyzer;

@ -0,0 +1,30 @@
{
"name": "wappalyzer",
"description": "Identify technology on websites",
"homepage": "https://www.wappalyzer.com",
"version": "6.0.0",
"author": "Wappalyzer",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/aliasio/wappalyzer"
},
"funding": {
"url": "https://github.com/sponsors/aliasio"
},
"main": "index.js",
"files": [
"apps.json",
"cli.js",
"driver.js",
"index.js",
"wappalyzer.js"
],
"bin": {
"wappalyzer": "./cli.js"
},
"dependencies": {
"languagedetect": "^2.0.0",
"puppeteer": "^2.0.0"
}
}

@ -0,0 +1,296 @@
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
# yarn lockfile v1
"@types/mime-types@^2.1.0":
version "2.1.0"
resolved "https://registry.yarnpkg.com/@types/mime-types/-/mime-types-2.1.0.tgz#9ca52cda363f699c69466c2a6ccdaad913ea7a73"
integrity sha1-nKUs2jY/aZxpRmwqbM2q2RPqenM=
agent-base@5:
version "5.1.1"
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-5.1.1.tgz#e8fb3f242959db44d63be665db7a8e739537a32c"
integrity sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==
async-limiter@~1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/async-limiter/-/async-limiter-1.0.1.tgz#dd379e94f0db8310b08291f9d64c3209766617fd"
integrity sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==
balanced-match@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767"
integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c=
brace-expansion@^1.1.7:
version "1.1.11"
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==
dependencies:
balanced-match "^1.0.0"
concat-map "0.0.1"
buffer-crc32@~0.2.3:
version "0.2.13"
resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242"
integrity sha1-DTM+PwDqxQqhRUq9MO+MKl2ackI=
buffer-from@^1.0.0:
version "1.1.1"
resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef"
integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==
concat-map@0.0.1:
version "0.0.1"
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
concat-stream@^1.6.2:
version "1.6.2"
resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.6.2.tgz#904bdf194cd3122fc675c77fc4ac3d4ff0fd1a34"
integrity sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==
dependencies:
buffer-from "^1.0.0"
inherits "^2.0.3"
readable-stream "^2.2.2"
typedarray "^0.0.6"
core-util-is@~1.0.0:
version "1.0.2"
resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7"
integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=
debug@4, debug@^4.1.0:
version "4.1.1"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791"
integrity sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==
dependencies:
ms "^2.1.1"
debug@^2.6.9:
version "2.6.9"
resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
dependencies:
ms "2.0.0"
extract-zip@^1.6.6:
version "1.7.0"
resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-1.7.0.tgz#556cc3ae9df7f452c493a0cfb51cc30277940927"
integrity sha512-xoh5G1W/PB0/27lXgMQyIhP5DSY/LhoCsOyZgb+6iMmRtCwVBo55uKaMoEYrDCKQhWvqEip5ZPKAc6eFNyf/MA==
dependencies:
concat-stream "^1.6.2"
debug "^2.6.9"
mkdirp "^0.5.4"
yauzl "^2.10.0"
fd-slicer@~1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e"
integrity sha1-JcfInLH5B3+IkbvmHY85Dq4lbx4=
dependencies:
pend "~1.2.0"
fs.realpath@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8=
glob@^7.1.3:
version "7.1.6"
resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6"
integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==
dependencies:
fs.realpath "^1.0.0"
inflight "^1.0.4"
inherits "2"
minimatch "^3.0.4"
once "^1.3.0"
path-is-absolute "^1.0.0"
https-proxy-agent@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz#702b71fb5520a132a66de1f67541d9e62154d82b"
integrity sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==
dependencies:
agent-base "5"
debug "4"
inflight@^1.0.4:
version "1.0.6"
resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
integrity sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=
dependencies:
once "^1.3.0"
wrappy "1"
inherits@2, inherits@^2.0.3, inherits@~2.0.3:
version "2.0.4"
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
isarray@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
integrity sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=
languagedetect@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/languagedetect/-/languagedetect-2.0.0.tgz#4b8fa2b7593b2a3a02fb1100891041c53238936c"
integrity sha512-AZb/liiQ+6ZoTj4f1J0aE6OkzhCo8fyH+tuSaPfSo8YHCWLFJrdSixhtO2TYdIkjcDQNaR4RmGaV2A5FJklDMQ==
mime-db@1.44.0:
version "1.44.0"
resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.44.0.tgz#fa11c5eb0aca1334b4233cb4d52f10c5a6272f92"
integrity sha512-/NOTfLrsPBVeH7YtFPgsVWveuL+4SjjYxaQ1xtM1KMFj7HdxlBlxeyNLzhyJVx7r4rZGJAZ/6lkKCitSc/Nmpg==
mime-types@^2.1.25:
version "2.1.27"
resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.27.tgz#47949f98e279ea53119f5722e0f34e529bec009f"
integrity sha512-JIhqnCasI9yD+SsmkquHBxTSEuZdQX5BuQnS2Vc7puQQQ+8yiP5AY5uWhpdv4YL4VM5c6iliiYWPgJ/nJQLp7w==
dependencies:
mime-db "1.44.0"
mime@^2.0.3:
version "2.4.5"
resolved "https://registry.yarnpkg.com/mime/-/mime-2.4.5.tgz#d8de2ecb92982dedbb6541c9b6841d7f218ea009"
integrity sha512-3hQhEUF027BuxZjQA3s7rIv/7VCQPa27hN9u9g87sEkWaKwQPuXOkVKtOeiyUrnWqTDiOs8Ed2rwg733mB0R5w==
minimatch@^3.0.4:
version "3.0.4"
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==
dependencies:
brace-expansion "^1.1.7"
minimist@^1.2.5:
version "1.2.5"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
mkdirp@^0.5.4:
version "0.5.5"
resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def"
integrity sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==
dependencies:
minimist "^1.2.5"
ms@2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
integrity sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=
ms@^2.1.1:
version "2.1.2"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
once@^1.3.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E=
dependencies:
wrappy "1"
path-is-absolute@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18=
pend@~1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50"
integrity sha1-elfrVQpng/kRUzH89GY9XI4AelA=
process-nextick-args@~2.0.0:
version "2.0.1"
resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
progress@^2.0.1:
version "2.0.3"
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
proxy-from-env@^1.0.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==
puppeteer@^2.0.0:
version "2.1.1"
resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-2.1.1.tgz#ccde47c2a688f131883b50f2d697bd25189da27e"
integrity sha512-LWzaDVQkk1EPiuYeTOj+CZRIjda4k2s5w4MK4xoH2+kgWV/SDlkYHmxatDdtYrciHUKSXTsGgPgPP8ILVdBsxg==
dependencies:
"@types/mime-types" "^2.1.0"
debug "^4.1.0"
extract-zip "^1.6.6"
https-proxy-agent "^4.0.0"
mime "^2.0.3"
mime-types "^2.1.25"
progress "^2.0.1"
proxy-from-env "^1.0.0"
rimraf "^2.6.1"
ws "^6.1.0"
readable-stream@^2.2.2:
version "2.3.7"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.7.tgz#1eca1cf711aef814c04f62252a36a62f6cb23b57"
integrity sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==
dependencies:
core-util-is "~1.0.0"
inherits "~2.0.3"
isarray "~1.0.0"
process-nextick-args "~2.0.0"
safe-buffer "~5.1.1"
string_decoder "~1.1.1"
util-deprecate "~1.0.1"
rimraf@^2.6.1:
version "2.7.1"
resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==
dependencies:
glob "^7.1.3"
safe-buffer@~5.1.0, safe-buffer@~5.1.1:
version "5.1.2"
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==
string_decoder@~1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8"
integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==
dependencies:
safe-buffer "~5.1.0"
typedarray@^0.0.6:
version "0.0.6"
resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777"
integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=
util-deprecate@~1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=
wrappy@1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
ws@^6.1.0:
version "6.2.1"
resolved "https://registry.yarnpkg.com/ws/-/ws-6.2.1.tgz#442fdf0a47ed64f59b6a5d8ff130f4748ed524fb"
integrity sha512-GIyAXC2cB7LjvpgMt9EKS2ldqr0MTrORaleiOno6TweZ6r3TKtoFQWay/2PceJ3RuBasOHzXNn5Lrw1X0bEjqA==
dependencies:
async-limiter "~1.0.0"
yauzl@^2.10.0:
version "2.10.0"
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
integrity sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk=
dependencies:
buffer-crc32 "~0.2.3"
fd-slicer "~1.1.0"

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save