You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

736 lines
17 KiB

/**
7 years ago
* Wappalyzer v5
*
* Created by Elbert Alias <elbert@alias.io>
*
* License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
*/
7 years ago
const validation = {
hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
}
7 years ago
6 years ago
/**
* Enclose string in array
*/
function asArray(value) {
return Array.isArray(value) ? value : [value]
6 years ago
}
/**
*
*/
function asyncForEach(iterable, iterator) {
return Promise.all(
(iterable || []).map(
(item) =>
new Promise((resolve) => setTimeout(() => resolve(iterator(item)), 1))
)
)
6 years ago
}
/**
* Mark application as detected, set confidence and version
*/
function addDetected(app, pattern, type, value, key) {
app.detected = true
6 years ago
// Set confidence level
app.confidence[`${type} ${key ? `${key} ` : ''}${pattern.regex}`] =
pattern.confidence === undefined ? 100 : parseInt(pattern.confidence, 10)
6 years ago
// Detect version number
if (pattern.version) {
const versions = []
const matches = pattern.regex.exec(value)
6 years ago
let { version } = pattern
6 years ago
if (matches) {
matches.forEach((match, i) => {
// Parse ternary operator
const ternary = new RegExp(`\\\\${i}\\?([^:]+):(.*)$`).exec(version)
6 years ago
if (ternary && ternary.length === 3) {
version = version.replace(ternary[0], match ? ternary[1] : ternary[2])
6 years ago
}
// Replace back references
version = version
.trim()
.replace(new RegExp(`\\\\${i}`, 'g'), match || '')
})
6 years ago
if (version && !versions.includes(version)) {
versions.push(version)
6 years ago
}
if (versions.length) {
// Use the longest detected version number
app.version = versions.reduce((a, b) => (a.length > b.length ? a : b))
6 years ago
}
}
}
}
function resolveExcludes(apps, detected) {
const excludes = []
const detectedApps = Object.assign({}, apps, detected)
6 years ago
// Exclude app in detected apps only
6 years ago
Object.keys(detectedApps).forEach((appName) => {
const app = detectedApps[appName]
6 years ago
if (app.props.excludes) {
asArray(app.props.excludes).forEach((excluded) => {
excludes.push(excluded)
})
6 years ago
}
})
6 years ago
// Remove excluded applications
Object.keys(apps).forEach((appName) => {
if (excludes.includes(appName)) {
delete apps[appName]
6 years ago
}
})
6 years ago
}
class Application {
constructor(name, props, detected) {
this.confidence = {}
this.confidenceTotal = 0
this.detected = Boolean(detected)
this.excludes = []
this.name = name
this.props = props
this.version = ''
6 years ago
}
/**
* Calculate confidence total
*/
getConfidence() {
let total = 0
6 years ago
6 years ago
Object.keys(this.confidence).forEach((id) => {
total += this.confidence[id]
})
6 years ago
this.confidenceTotal = Math.min(total, 100)
6 years ago
return this.confidenceTotal
6 years ago
}
}
class Wappalyzer {
constructor() {
this.apps = {}
this.categories = {}
this.driver = {}
this.jsPatterns = {}
this.detected = {}
this.hostnameCache = {
expires: Date.now() + 1000 * 60 * 60 * 24,
hostnames: {}
}
this.adCache = []
this.config = {
websiteURL: 'https://www.wappalyzer.com/',
twitterURL: 'https://twitter.com/Wappalyzer',
githubURL: 'https://github.com/AliasIO/Wappalyzer'
}
}
7 years ago
/**
* Log messages to console
*/
log(message, source, type) {
6 years ago
if (this.driver.log) {
this.driver.log(message, source || '', type || 'debug')
6 years ago
}
}
7 years ago
7 years ago
analyze(url, data, context) {
const apps = {}
const promises = []
const startTime = new Date()
const { scripts, cookies, headers, js } = data
let { html } = data
7 years ago
6 years ago
if (this.detected[url.canonical] === undefined) {
this.detected[url.canonical] = {}
}
7 years ago
const metaTags = []
// Additional information
let language = null
6 years ago
if (html) {
if (typeof html !== 'string') {
html = ''
}
let matches = data.html.match(
new RegExp('<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i')
)
language = matches && matches.length ? matches[1] : data.language || null
// Meta tags
const regex = /<meta[^>]+>/gi
do {
matches = regex.exec(html)
if (!matches) {
break
}
metaTags.push(matches[0])
} while (matches)
}
6 years ago
Object.keys(this.apps).forEach((appName) => {
apps[appName] =
this.detected[url.canonical] && this.detected[url.canonical][appName]
? this.detected[url.canonical][appName]
: new Application(appName, this.apps[appName])
7 years ago
const app = apps[appName]
7 years ago
promises.push(this.analyzeUrl(app, url))
7 years ago
6 years ago
if (html) {
promises.push(this.analyzeHtml(app, html))
promises.push(this.analyzeMeta(app, metaTags))
}
7 years ago
6 years ago
if (scripts) {
promises.push(this.analyzeScripts(app, scripts))
}
6 years ago
if (cookies) {
promises.push(this.analyzeCookies(app, cookies))
}
6 years ago
if (headers) {
promises.push(this.analyzeHeaders(app, headers))
}
})
6 years ago
if (js) {
Object.keys(js).forEach((appName) => {
if (typeof js[appName] !== 'function') {
promises.push(this.analyzeJs(apps[appName], js[appName]))
}
})
}
return new Promise(async (resolve) => {
await Promise.all(promises)
7 years ago
Object.keys(apps).forEach((appName) => {
const app = apps[appName]
if (!app.detected || !app.getConfidence()) {
delete apps[app.name]
}
})
7 years ago
resolveExcludes(apps, this.detected[url])
this.resolveImplies(apps, url.canonical)
7 years ago
this.cacheDetectedApps(apps, url.canonical)
this.trackDetectedApps(apps, url, language)
this.log(
`Processing ${Object.keys(data).join(', ')} took ${(
(new Date() - startTime) /
1000
).toFixed(2)}s (${url.hostname})`,
'core'
)
if (Object.keys(apps).length) {
this.log(
`Identified ${Object.keys(apps).join(', ')} (${url.hostname})`,
'core'
)
}
7 years ago
this.driver.displayApps(
this.detected[url.canonical],
{ language },
context
)
return resolve()
})
7 years ago
}
/**
* Cache detected ads
*/
cacheDetectedAds(ad) {
this.adCache.push(ad)
}
7 years ago
/**
*
*/
robotsTxtAllows(url) {
return new Promise(async (resolve, reject) => {
const parsed = this.parseUrl(url)
6 years ago
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
return reject()
}
const robotsTxt = await this.driver.getRobotsTxt(
parsed.host,
parsed.protocol === 'https:'
)
if (
robotsTxt.some(
(disallowedPath) => parsed.pathname.indexOf(disallowedPath) === 0
)
) {
return reject()
}
return resolve()
})
6 years ago
}
/**
* Parse a URL
*/
parseUrl(url) {
const a = this.driver.document.createElement('a')
a.href = url
a.canonical = `${a.protocol}//${a.host}${a.pathname}`
return a
}
/**
*
*/
6 years ago
static parseRobotsTxt(robotsTxt) {
const disallow = []
let userAgent
6 years ago
robotsTxt.split('\n').forEach((line) => {
let matches = /^User-agent:\s*(.+)$/i.exec(line.trim())
6 years ago
if (matches) {
userAgent = matches[1].toLowerCase()
6 years ago
} else if (userAgent === '*' || userAgent === 'wappalyzer') {
matches = /^Disallow:\s*(.+)$/i.exec(line.trim())
6 years ago
if (matches) {
disallow.push(matches[1])
}
}
})
return disallow
}
/**
*
*/
ping() {
if (
!this.hostnameCache.hostnames ||
Object.keys(this.hostnameCache.hostnames).length > 50 ||
this.hostnameCache.expires < Date.now()
) {
this.driver.ping(this.hostnameCache)
7 years ago
this.hostnameCache = {
expires: Date.now() + 1000 * 60 * 60 * 24,
hostnames: {}
}
7 years ago
}
6 years ago
if (this.adCache.length > 50) {
this.driver.ping({}, this.adCache)
7 years ago
this.adCache = []
}
7 years ago
}
/**
* Parse apps.json patterns
*/
parsePatterns(patterns) {
6 years ago
if (!patterns) {
return []
}
let parsed = {}
// Convert string to object containing array containing string
if (typeof patterns === 'string' || Array.isArray(patterns)) {
patterns = {
main: asArray(patterns)
}
}
7 years ago
6 years ago
Object.keys(patterns).forEach((key) => {
parsed[key] = []
7 years ago
6 years ago
asArray(patterns[key]).forEach((pattern) => {
const attrs = {}
7 years ago
pattern.split('\\;').forEach((attr, i) => {
6 years ago
if (i) {
// Key value pairs
attr = attr.split(':')
7 years ago
6 years ago
if (attr.length > 1) {
attrs[attr.shift()] = attr.join(':')
}
} else {
attrs.string = attr
7 years ago
try {
attrs.regex = new RegExp(attr.replace('/', '/'), 'i') // Escape slashes in regular expression
} catch (error) {
attrs.regex = new RegExp()
7 years ago
this.log(`${error.message}: ${attr}`, 'error', 'core')
}
}
})
7 years ago
parsed[key].push(attrs)
})
})
7 years ago
// Convert back to array if the original pattern list was an array (or string)
6 years ago
if ('main' in parsed) {
parsed = parsed.main
7 years ago
}
return parsed
}
7 years ago
/**
* Parse JavaScript patterns
*/
parseJsPatterns() {
6 years ago
Object.keys(this.apps).forEach((appName) => {
6 years ago
if (this.apps[appName].js) {
this.jsPatterns[appName] = this.parsePatterns(this.apps[appName].js)
}
})
}
resolveImplies(apps, url) {
let checkImplies = true
7 years ago
const resolve = (appName) => {
const app = apps[appName]
if (app && app.props.implies) {
asArray(app.props.implies).forEach((implied) => {
;[implied] = this.parsePatterns(implied)
7 years ago
if (!this.apps[implied.string]) {
this.log(
`Implied application ${implied.string} does not exist`,
'core',
'warn'
)
return
}
if (!(implied.string in apps)) {
apps[implied.string] =
this.detected[url] && this.detected[url][implied.string]
? this.detected[url][implied.string]
: new Application(
implied.string,
this.apps[implied.string],
true
)
checkImplies = true
}
// Apply app confidence to implied app
Object.keys(app.confidence).forEach((id) => {
apps[implied.string].confidence[`${id} implied by ${appName}`] =
app.confidence[id] *
(implied.confidence === undefined ? 1 : implied.confidence / 100)
})
})
}
}
// Implied applications
// Run several passes as implied apps may imply other apps
while (checkImplies) {
checkImplies = false
Object.keys(apps).forEach(resolve)
7 years ago
}
}
7 years ago
/**
* Cache detected applications
*/
cacheDetectedApps(apps, url) {
6 years ago
Object.keys(apps).forEach((appName) => {
const app = apps[appName]
// Per URL
this.detected[url][appName] = app
Object.keys(app.confidence).forEach((id) => {
this.detected[url][appName].confidence[id] = app.confidence[id]
})
})
6 years ago
if (this.driver.ping instanceof Function) {
this.ping()
7 years ago
}
7 years ago
}
/**
* Track detected applications
*/
7 years ago
trackDetectedApps(apps, url, language) {
6 years ago
if (!(this.driver.ping instanceof Function)) {
return
7 years ago
}
7 years ago
const hostname = `${url.protocol}//${url.hostname}`
7 years ago
6 years ago
Object.keys(apps).forEach((appName) => {
const app = apps[appName]
7 years ago
6 years ago
if (this.detected[url.canonical][appName].getConfidence() >= 100) {
if (
validation.hostname.test(url.hostname) &&
!validation.hostnameBlacklist.test(url.hostname)
6 years ago
) {
if (!(hostname in this.hostnameCache.hostnames)) {
this.hostnameCache.hostnames[hostname] = {
7 years ago
applications: {},
meta: {}
}
7 years ago
}
7 years ago
if (
!(appName in this.hostnameCache.hostnames[hostname].applications)
) {
this.hostnameCache.hostnames[hostname].applications[appName] = {
hits: 0
}
7 years ago
}
this.hostnameCache.hostnames[hostname].applications[appName].hits += 1
7 years ago
6 years ago
if (apps[appName].version) {
this.hostnameCache.hostnames[hostname].applications[
appName
].version = app.version
7 years ago
}
}
7 years ago
}
})
7 years ago
if (hostname in this.hostnameCache.hostnames) {
this.hostnameCache.hostnames[hostname].meta.language = language
}
this.ping()
7 years ago
}
/**
* Analyze URL
*/
analyzeUrl(app, url) {
const patterns = this.parsePatterns(app.props.url)
7 years ago
6 years ago
if (!patterns.length) {
return Promise.resolve()
}
6 years ago
return asyncForEach(patterns, (pattern) => {
if (pattern.regex.test(url.canonical)) {
addDetected(app, pattern, 'url', url.canonical)
}
})
}
/**
* Analyze HTML
*/
analyzeHtml(app, html) {
const patterns = this.parsePatterns(app.props.html)
7 years ago
6 years ago
if (!patterns.length) {
return Promise.resolve()
}
6 years ago
return asyncForEach(patterns, (pattern) => {
if (pattern.regex.test(html)) {
addDetected(app, pattern, 'html', html)
}
})
7 years ago
}
/**
* Analyze script tag
*/
analyzeScripts(app, scripts) {
const patterns = this.parsePatterns(app.props.script)
6 years ago
if (!patterns.length) {
return Promise.resolve()
}
6 years ago
return asyncForEach(patterns, (pattern) => {
scripts.forEach((uri) => {
if (pattern.regex.test(uri)) {
addDetected(app, pattern, 'script', uri)
}
})
})
}
/**
* Analyze meta tag
*/
analyzeMeta(app, metaTags) {
const patterns = this.parsePatterns(app.props.meta)
const promises = []
6 years ago
if (!app.props.meta) {
return Promise.resolve()
6 years ago
}
metaTags.forEach((match) => {
6 years ago
Object.keys(patterns).forEach((meta) => {
const r = new RegExp(`(?:name|property)=["']${meta}["']`, 'i')
6 years ago
if (r.test(match)) {
const content = match.match(/content=("|')([^"']+)("|')/i)
promises.push(
asyncForEach(patterns[meta], (pattern) => {
if (
content &&
content.length === 4 &&
pattern.regex.test(content[2])
) {
addDetected(app, pattern, 'meta', content[2], meta)
}
})
)
}
})
})
return Promise.all(promises)
7 years ago
}
/**
* Analyze response headers
*/
analyzeHeaders(app, headers) {
const patterns = this.parsePatterns(app.props.headers)
const promises = []
7 years ago
6 years ago
Object.keys(patterns).forEach((headerName) => {
if (typeof patterns[headerName] !== 'function') {
promises.push(
asyncForEach(patterns[headerName], (pattern) => {
headerName = headerName.toLowerCase()
if (headerName in headers) {
headers[headerName].forEach((headerValue) => {
if (pattern.regex.test(headerValue)) {
addDetected(app, pattern, 'headers', headerValue, headerName)
}
})
}
})
)
}
})
return promises ? Promise.all(promises) : Promise.resolve()
}
/**
* Analyze cookies
*/
analyzeCookies(app, cookies) {
const patterns = this.parsePatterns(app.props.cookies)
const promises = []
6 years ago
Object.keys(patterns).forEach((cookieName) => {
if (typeof patterns[cookieName] !== 'function') {
const cookieNameLower = cookieName.toLowerCase()
promises.push(
asyncForEach(patterns[cookieName], (pattern) => {
const cookie = cookies.find(
(_cookie) => _cookie.name.toLowerCase() === cookieNameLower
)
if (cookie && pattern.regex.test(cookie.value)) {
addDetected(app, pattern, 'cookies', cookie.value, cookieName)
}
})
)
}
})
return promises ? Promise.all(promises) : Promise.resolve()
7 years ago
}
/**
* Analyze JavaScript variables
*/
analyzeJs(app, results) {
const promises = []
6 years ago
Object.keys(results).forEach((string) => {
if (typeof results[string] !== 'function') {
promises.push(
asyncForEach(Object.keys(results[string]), (index) => {
const pattern = this.jsPatterns[app.name][string][index]
const value = results[string][index]
if (pattern && pattern.regex.test(value)) {
addDetected(app, pattern, 'js', value, string)
}
})
)
}
})
return promises ? Promise.all(promises) : Promise.resolve()
}
}
6 years ago
if (typeof module === 'object') {
module.exports = Wappalyzer
}