Fix NPM driver in Lambda

main
Elbert Alias 5 years ago
parent ebfef67608
commit e36ae46783

@ -1,19 +1,28 @@
const { URL } = require('url') const { URL } = require('url')
const fs = require('fs') const fs = require('fs')
const path = require('path')
const LanguageDetect = require('languagedetect') const LanguageDetect = require('languagedetect')
const Wappalyzer = require('./wappalyzer') const Wappalyzer = require('./wappalyzer')
const { AWS_LAMBDA_FUNCTION_NAME, CHROMIUM_BIN } = process.env const { AWS_LAMBDA_FUNCTION_NAME, CHROMIUM_BIN } = process.env
let puppeteer let puppeteer
let chromiumArgs = [
'--no-sandbox',
'--headless',
'--disable-gpu',
'--ignore-certificate-errors'
]
let chromiumBin = CHROMIUM_BIN
if (AWS_LAMBDA_FUNCTION_NAME) { if (AWS_LAMBDA_FUNCTION_NAME) {
// eslint-disable-next-line global-require, import/no-unresolved const chromium = require('chrome-aws-lambda')
;({
chromium: { puppeteer } ;({ puppeteer } = chromium)
} = require('chrome-aws-lambda'))
chromiumArgs = chromiumArgs.concat(chromium.args)
chromiumBin = chromium.executablePath
} else { } else {
// eslint-disable-next-line global-require
puppeteer = require('puppeteer') puppeteer = require('puppeteer')
} }
@ -21,7 +30,7 @@ const languageDetect = new LanguageDetect()
languageDetect.setLanguageType('iso2') languageDetect.setLanguageType('iso2')
const json = JSON.parse(fs.readFileSync('./apps.json')) const json = JSON.parse(fs.readFileSync(path.resolve(`${__dirname}/apps.json`)))
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/ const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/
@ -154,13 +163,8 @@ class Driver {
try { try {
this.browser = await puppeteer.launch({ this.browser = await puppeteer.launch({
args: [ args: chromiumArgs,
'--no-sandbox', executablePath: await chromiumBin
'--headless',
'--disable-gpu',
'--ignore-certificate-errors'
],
executablePath: CHROMIUM_BIN
}) })
this.browser.on('disconnected', async () => { this.browser.on('disconnected', async () => {

@ -1,8 +1,19 @@
{ {
"name": "wappalyzer", "name": "wappalyzer",
"description": "Identify technology on websites", "description": "Identify technology on websites",
"keywords": [
"analyze",
"identify",
"detect",
"detector",
"technology",
"cms",
"framework",
"library",
"software"
],
"homepage": "https://www.wappalyzer.com", "homepage": "https://www.wappalyzer.com",
"version": "6.0.0", "version": "6.0.1",
"author": "Wappalyzer", "author": "Wappalyzer",
"license": "MIT", "license": "MIT",
"repository": { "repository": {

@ -1,722 +0,0 @@
/**
* Wappalyzer v5
*
* Created by Elbert Alias <elbert@alias.io>
*
* License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
*/
const validation = {
hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
}
/**
* Enclose string in array
*/
function asArray(value) {
return Array.isArray(value) ? value : [value]
}
/**
*
*/
function asyncForEach(iterable, iterator) {
return Promise.all(
(iterable || []).map(
(item) =>
new Promise((resolve) => setTimeout(() => resolve(iterator(item)), 1))
)
)
}
/**
* Mark application as detected, set confidence and version
*/
function addDetected(app, pattern, type, value, key) {
app.detected = true
// Set confidence level
app.confidence[`${type} ${key ? `${key} ` : ''}${pattern.regex}`] =
pattern.confidence === undefined ? 100 : parseInt(pattern.confidence, 10)
// Detect version number
if (pattern.version) {
const versions = []
const matches = pattern.regex.exec(value)
let { version } = pattern
if (matches) {
matches.forEach((match, i) => {
// Parse ternary operator
const ternary = new RegExp(`\\\\${i}\\?([^:]+):(.*)$`).exec(version)
if (ternary && ternary.length === 3) {
version = version.replace(ternary[0], match ? ternary[1] : ternary[2])
}
// Replace back references
version = version
.trim()
.replace(new RegExp(`\\\\${i}`, 'g'), match || '')
})
if (version && !versions.includes(version)) {
versions.push(version)
}
if (versions.length) {
// Use the longest detected version number
app.version = versions.reduce((a, b) => (a.length > b.length ? a : b))
}
}
}
}
function resolveExcludes(apps, detected) {
const excludes = []
const detectedApps = Object.assign({}, apps, detected)
// Exclude app in detected apps only
Object.keys(detectedApps).forEach((appName) => {
const app = detectedApps[appName]
if (app.props.excludes) {
asArray(app.props.excludes).forEach((excluded) => {
excludes.push(excluded)
})
}
})
// Remove excluded applications
Object.keys(apps).forEach((appName) => {
if (excludes.includes(appName)) {
delete apps[appName]
}
})
}
class Application {
constructor(name, props, detected) {
this.confidence = {}
this.confidenceTotal = 0
this.detected = Boolean(detected)
this.excludes = []
this.name = name
this.props = props
this.version = ''
}
/**
* Calculate confidence total
*/
getConfidence() {
let total = 0
Object.keys(this.confidence).forEach((id) => {
total += this.confidence[id]
})
this.confidenceTotal = Math.min(total, 100)
return this.confidenceTotal
}
}
class Wappalyzer {
constructor() {
this.apps = {}
this.categories = {}
this.driver = {}
this.jsPatterns = {}
this.detected = {}
this.hostnameCache = {}
this.adCache = []
this.config = {
websiteURL: 'https://www.wappalyzer.com/',
twitterURL: 'https://twitter.com/Wappalyzer',
githubURL: 'https://github.com/AliasIO/Wappalyzer'
}
}
/**
* Log messages to console
*/
log(message, source, type) {
if (this.driver.log) {
this.driver.log(message, source || '', type || 'debug')
}
}
analyze(url, data, context) {
const apps = {}
const promises = []
const startTime = new Date()
const { scripts, cookies, headers, js } = data
let { html } = data
if (this.detected[url.canonical] === undefined) {
this.detected[url.canonical] = {}
}
const metaTags = []
// Additional information
let language = null
if (html) {
if (typeof html !== 'string') {
html = ''
}
let matches = data.html.match(
new RegExp('<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i')
)
language = matches && matches.length ? matches[1] : data.language || null
// Meta tags
const regex = /<meta[^>]+>/gi
do {
matches = regex.exec(html)
if (!matches) {
break
}
metaTags.push(matches[0])
} while (matches)
}
Object.keys(this.apps).forEach((appName) => {
apps[appName] =
this.detected[url.canonical] && this.detected[url.canonical][appName]
? this.detected[url.canonical][appName]
: new Application(appName, this.apps[appName])
const app = apps[appName]
promises.push(this.analyzeUrl(app, url))
if (html) {
promises.push(this.analyzeHtml(app, html))
promises.push(this.analyzeMeta(app, metaTags))
}
if (scripts) {
promises.push(this.analyzeScripts(app, scripts))
}
if (cookies) {
promises.push(this.analyzeCookies(app, cookies))
}
if (headers) {
promises.push(this.analyzeHeaders(app, headers))
}
})
if (js) {
Object.keys(js).forEach((appName) => {
if (typeof js[appName] !== 'function') {
promises.push(this.analyzeJs(apps[appName], js[appName]))
}
})
}
return new Promise(async (resolve) => {
await Promise.all(promises)
Object.keys(apps).forEach((appName) => {
const app = apps[appName]
if (!app.detected || !app.getConfidence()) {
delete apps[app.name]
}
})
resolveExcludes(apps, this.detected[url])
this.resolveImplies(apps, url.canonical)
this.cacheDetectedApps(apps, url.canonical)
this.trackDetectedApps(apps, url, language)
this.log(
`Processing ${Object.keys(data).join(', ')} took ${(
(new Date() - startTime) /
1000
).toFixed(2)}s (${url.hostname})`,
'core'
)
if (Object.keys(apps).length) {
this.log(
`Identified ${Object.keys(apps).join(', ')} (${url.hostname})`,
'core'
)
}
this.driver.displayApps(
this.detected[url.canonical],
{ language },
context
)
return resolve()
})
}
/**
* Cache detected ads
*/
cacheDetectedAds(ad) {
this.adCache.push(ad)
}
/**
*
*/
robotsTxtAllows(url) {
return new Promise(async (resolve, reject) => {
const parsed = this.parseUrl(url)
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
return reject()
}
const robotsTxt = await this.driver.getRobotsTxt(
parsed.host,
parsed.protocol === 'https:'
)
if (
robotsTxt.some(
(disallowedPath) => parsed.pathname.indexOf(disallowedPath) === 0
)
) {
return reject()
}
return resolve()
})
}
/**
* Parse a URL
*/
parseUrl(url) {
const a = this.driver.document.createElement('a')
a.href = url
a.canonical = `${a.protocol}//${a.host}${a.pathname}`
return a
}
/**
*
*/
static parseRobotsTxt(robotsTxt) {
const disallow = []
let userAgent
robotsTxt.split('\n').forEach((line) => {
let matches = /^User-agent:\s*(.+)$/i.exec(line.trim())
if (matches) {
userAgent = matches[1].toLowerCase()
} else if (userAgent === '*' || userAgent === 'wappalyzer') {
matches = /^Disallow:\s*(.+)$/i.exec(line.trim())
if (matches) {
disallow.push(matches[1])
}
}
})
return disallow
}
/**
*
*/
ping() {
if (Object.keys(this.hostnameCache).length > 50) {
this.driver.ping(this.hostnameCache)
this.hostnameCache = {}
}
if (this.adCache.length > 50) {
this.driver.ping({}, this.adCache)
this.adCache = []
}
}
/**
* Parse apps.json patterns
*/
parsePatterns(patterns) {
if (!patterns) {
return []
}
let parsed = {}
// Convert string to object containing array containing string
if (typeof patterns === 'string' || Array.isArray(patterns)) {
patterns = {
main: asArray(patterns)
}
}
Object.keys(patterns).forEach((key) => {
parsed[key] = []
asArray(patterns[key]).forEach((pattern) => {
const attrs = {}
pattern.split('\\;').forEach((attr, i) => {
if (i) {
// Key value pairs
attr = attr.split(':')
if (attr.length > 1) {
attrs[attr.shift()] = attr.join(':')
}
} else {
attrs.string = attr
try {
attrs.regex = new RegExp(attr.replace('/', '/'), 'i') // Escape slashes in regular expression
} catch (error) {
attrs.regex = new RegExp()
this.log(`${error.message}: ${attr}`, 'error', 'core')
}
}
})
parsed[key].push(attrs)
})
})
// Convert back to array if the original pattern list was an array (or string)
if ('main' in parsed) {
parsed = parsed.main
}
return parsed
}
/**
* Parse JavaScript patterns
*/
parseJsPatterns() {
Object.keys(this.apps).forEach((appName) => {
if (this.apps[appName].js) {
this.jsPatterns[appName] = this.parsePatterns(this.apps[appName].js)
}
})
}
resolveImplies(apps, url) {
let checkImplies = true
const resolve = (appName) => {
const app = apps[appName]
if (app && app.props.implies) {
asArray(app.props.implies).forEach((implied) => {
;[implied] = this.parsePatterns(implied)
if (!this.apps[implied.string]) {
this.log(
`Implied application ${implied.string} does not exist`,
'core',
'warn'
)
return
}
if (!(implied.string in apps)) {
apps[implied.string] =
this.detected[url] && this.detected[url][implied.string]
? this.detected[url][implied.string]
: new Application(
implied.string,
this.apps[implied.string],
true
)
checkImplies = true
}
// Apply app confidence to implied app
Object.keys(app.confidence).forEach((id) => {
apps[implied.string].confidence[`${id} implied by ${appName}`] =
app.confidence[id] *
(implied.confidence === undefined ? 1 : implied.confidence / 100)
})
})
}
}
// Implied applications
// Run several passes as implied apps may imply other apps
while (checkImplies) {
checkImplies = false
Object.keys(apps).forEach(resolve)
}
}
/**
* Cache detected applications
*/
cacheDetectedApps(apps, url) {
Object.keys(apps).forEach((appName) => {
const app = apps[appName]
// Per URL
this.detected[url][appName] = app
Object.keys(app.confidence).forEach((id) => {
this.detected[url][appName].confidence[id] = app.confidence[id]
})
})
if (this.driver.ping instanceof Function) {
this.ping()
}
}
/**
* Track detected applications
*/
trackDetectedApps(apps, url, language) {
if (!(this.driver.ping instanceof Function)) {
return
}
const hostname = `${url.protocol}//${url.hostname}`
Object.keys(apps).forEach((appName) => {
const app = apps[appName]
if (this.detected[url.canonical][appName].getConfidence() >= 100) {
if (
validation.hostname.test(url.hostname) &&
!validation.hostnameBlacklist.test(url.hostname)
) {
if (!(hostname in this.hostnameCache)) {
this.hostnameCache[hostname] = {
applications: {},
meta: {}
}
}
if (!(appName in this.hostnameCache[hostname].applications)) {
this.hostnameCache[hostname].applications[appName] = {
hits: 0
}
}
this.hostnameCache[hostname].applications[appName].hits += 1
if (apps[appName].version) {
this.hostnameCache[hostname].applications[appName].version =
app.version
}
}
}
})
if (hostname in this.hostnameCache) {
this.hostnameCache[hostname].meta.language = language
}
this.ping()
}
/**
* Analyze URL
*/
analyzeUrl(app, url) {
const patterns = this.parsePatterns(app.props.url)
if (!patterns.length) {
return Promise.resolve()
}
return asyncForEach(patterns, (pattern) => {
if (pattern.regex.test(url.canonical)) {
addDetected(app, pattern, 'url', url.canonical)
}
})
}
/**
* Analyze HTML
*/
analyzeHtml(app, html) {
const patterns = this.parsePatterns(app.props.html)
if (!patterns.length) {
return Promise.resolve()
}
return asyncForEach(patterns, (pattern) => {
if (pattern.regex.test(html)) {
addDetected(app, pattern, 'html', html)
}
})
}
/**
* Analyze script tag
*/
analyzeScripts(app, scripts) {
const patterns = this.parsePatterns(app.props.script)
if (!patterns.length) {
return Promise.resolve()
}
return asyncForEach(patterns, (pattern) => {
scripts.forEach((uri) => {
if (pattern.regex.test(uri)) {
addDetected(app, pattern, 'script', uri)
}
})
})
}
/**
* Analyze meta tag
*/
analyzeMeta(app, metaTags) {
const patterns = this.parsePatterns(app.props.meta)
const promises = []
if (!app.props.meta) {
return Promise.resolve()
}
metaTags.forEach((match) => {
Object.keys(patterns).forEach((meta) => {
const r = new RegExp(`(?:name|property)=["']${meta}["']`, 'i')
if (r.test(match)) {
const content = match.match(/content=("|')([^"']+)("|')/i)
promises.push(
asyncForEach(patterns[meta], (pattern) => {
if (
content &&
content.length === 4 &&
pattern.regex.test(content[2])
) {
addDetected(app, pattern, 'meta', content[2], meta)
}
})
)
}
})
})
return Promise.all(promises)
}
/**
* Analyze response headers
*/
analyzeHeaders(app, headers) {
const patterns = this.parsePatterns(app.props.headers)
const promises = []
Object.keys(patterns).forEach((headerName) => {
if (typeof patterns[headerName] !== 'function') {
promises.push(
asyncForEach(patterns[headerName], (pattern) => {
headerName = headerName.toLowerCase()
if (headerName in headers) {
headers[headerName].forEach((headerValue) => {
if (pattern.regex.test(headerValue)) {
addDetected(app, pattern, 'headers', headerValue, headerName)
}
})
}
})
)
}
})
return promises ? Promise.all(promises) : Promise.resolve()
}
/**
* Analyze cookies
*/
analyzeCookies(app, cookies) {
const patterns = this.parsePatterns(app.props.cookies)
const promises = []
Object.keys(patterns).forEach((cookieName) => {
if (typeof patterns[cookieName] !== 'function') {
const cookieNameLower = cookieName.toLowerCase()
promises.push(
asyncForEach(patterns[cookieName], (pattern) => {
const cookie = cookies.find(
(_cookie) => _cookie.name.toLowerCase() === cookieNameLower
)
if (cookie && pattern.regex.test(cookie.value)) {
addDetected(app, pattern, 'cookies', cookie.value, cookieName)
}
})
)
}
})
return promises ? Promise.all(promises) : Promise.resolve()
}
/**
* Analyze JavaScript variables
*/
analyzeJs(app, results) {
const promises = []
Object.keys(results).forEach((string) => {
if (typeof results[string] !== 'function') {
promises.push(
asyncForEach(Object.keys(results[string]), (index) => {
const pattern = this.jsPatterns[app.name][string][index]
const value = results[string][index]
if (pattern && pattern.regex.test(value)) {
addDetected(app, pattern, 'js', value, string)
}
})
)
}
})
return promises ? Promise.all(promises) : Promise.resolve()
}
}
if (typeof module === 'object') {
module.exports = Wappalyzer
}