From 5ed98d3d243c9c7588ddc86327dd4c456049900f Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Wed, 14 Oct 2020 18:36:09 +1100 Subject: [PATCH] Update README --- src/README.md | 435 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 409 insertions(+), 26 deletions(-) diff --git a/src/README.md b/src/README.md index 6ffff3ef9..1d8d23a1c 100644 --- a/src/README.md +++ b/src/README.md @@ -1,40 +1,423 @@ -# Wappalyzer core +# Wappalyzer [![Travis](https://travis-ci.org/aliasio/wappalyzer.svg?branch=master)](https://travis-ci.org/aliasio/wappalyzer/) -[Wappalyzer](https://www.wappalyzer.com/) indentifies technologies on websites. +[Wappalyzer](https://www.wappalyzer.com) identifies technologies on websites, including content management systems, ecommerce platforms, JavaScript frameworks, analytics tools and [much more](https://www.wappalyzer.com/technologies). -## Installation +* [wappalyzer on NPM](https://www.npmjs.com/package/wappalyzer) +* [wappalyzer-core on NPM](https://www.npmjs.com/package/wappalyzer-core) +* [Chrome extension](https://chrome.google.com/webstore/detail/wappalyzer/gppongmhjkpfnbhagpmjfkannfbllamg) +* [Firefox add-on](https://addons.mozilla.org/en-US/firefox/addon/wappalyzer/) +* [Edge extension](https://microsoftedge.microsoft.com/addons/detail/mnbndgmknlpdjdnjfmfcdjoegcckoikn) +* [Wappalyzer REST APIs](https://www.wappalyzer.com/api/) -```shell -$ npm i wappalyzer-core +## Prerequisites + +- [Git](https://git-scm.com) +- [Node.js](https://nodejs.org) version 12 or higher +- [Yarn](https://yarnpkg.com) + +## Quick start + +```sh +git clone https://github.com/aliasio/wappalyzer +cd wappalyzer +yarn install +yarn run link ``` ## Usage -```javascript -#!/usr/bin/env node +### Command line + +```sh +node src/drivers/npm/cli.js https://example.com +``` + +### Chrome extension + +* Go go `about:extensions` +* Enable 'Developer mode' +* Click 'Load unpacked' +* Select `src/drivers/webextension` + +### Firefox extension -const fs = require('fs') -const Wappalyzer = require('./wappalyzer') +* Go go `about:debugging#/runtime/this-firefox` +* Click 'Load Temporary Add-on' +* Select `src/drivers/webextension/manifest.json` -// See https://www.wappalyzer.com/docs/dev/specification or use -// https://raw.githubusercontent.com/AliasIO/wappalyzer/master/src/technologies.json -const { technologies, categories } = JSON.parse( - fs.readFileSync('./technologies.json') -) +## Specification -Wappalyzer.setTechnologies(technologies) -Wappalyzer.setCategories(categories) +A long list of [regular expressions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions) is used to identify technologies on web pages. Wappalyzer inspects HTML code, as well as JavaScript variables, response headers and more. -const detections = Wappalyzer.analyze({ - url: 'https://example.github.io/', - meta: { generator: ['WordPress'] }, - headers: { server: ['Nginx'] }, - scripts: ['jquery-3.0.0.js'], - cookies: { awselb: [''] }, - html: '
' -}) +Patterns (regular expressions) are kept in [`src/technologies.json`](https://github.com/aliasio/wappalyzer/blob/master/src/technologies.json). The following is an example of an application fingerprint. -const results = Wappalyzer.resolve(detections) +#### Example -console.log(results) +```json +"Example": { + "description": "A short description of the technology.", + "cats": [ + "1" + ], + "cookies": { + "cookie_name": "Example" + }, + "dom": { + "#example-id": { + "attributes": { + "class": "example-class" + } + "properties": { + "example-property": "" + } + "content": "Example text content" + } + }, + "dns": { + "MX": [ + "example\\.com" + ] + }, + "js": { + "Example.method": "" + }, + "excludes": "Example", + "headers": { + "X-Powered-By": "Example" + }, + "html": "]example\\.css", + "css": "\\.example-class", + "robots": "Disallow: /unique-path/", + "implies": "PHP\\;confidence:50", + "meta": { + "generator": "(?:Example|Another Example)" + }, + "script": "example-([0-9.]+)\\.js\\;confidence:50\\;version:\\1", + "url": ".+\\.example\\.com", + "website": "https://example.com", +} ``` + +## JSON fields + +Find the JSON schema at [`schema.json`](https://github.com/aliasio/wappalyzer/blob/master/schema.json). + +### Required properties + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescriptionExample
catsArray + One or more category IDs. + [1, 6]
websiteStringURL of the application's website. + "https://example.com" +
+ +### Optional properties + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescriptionExample
descriptionString + A short description of the technology in British English (max. + 250 characters). Write in a neutral, factual tone; not like an + ad. + "A short description."
iconStringApplication icon filename."WordPress.svg"
cpeString + The + CPE + is a structured naming scheme for applications, see the + specification. + "cpe:/a:apache:http_server"
+ +### Implies and excludes (optional) + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescriptionExample
impliesString | Array + The presence of one application can imply the presence of + another, e.g. WordpPress means PHP is also in use. + "PHP"
excludesString | Array + Opposite of implies. The presence of one application can exclude + the presence of another. + "Apache"
+ +### Patterns (optional) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescriptionExample
cookiesObjectCookies.{ "cookie_name": "Cookie value" }
domObject + Uses a + query selector + to inspect element properties, attributes and text content. + + { "#example-id": { "property": { "example-prop": "" } } + } +
dnsObject + DNS records: supports MX, TXT, SOA and NS (NPM driver only). + + { "MX": "example\\.com" } +
jsObject + JavaScript properties (case sensitive). Avoid short property + names to prevent matching minified code. + { "jQuery.fn.jquery": "" }
headersObjectHTTP response headers.{ "X-Powered-By": "^WordPress$" }
htmlString | Array + HTML source code. Patterns must include an HTML opening tag to + avoid matching plain text. For performance reasons, avoid + html where possible and use + dom instead. + "<a [^>]*href=\"index.html"
cssString | Array + CSS rules. Unavailable when a website enforces a same-origin + policy. For performance reasons, only a portion of the available + CSS rules are used to find matches. + "\\.example-class"
robotsString | Array + Robots.txt contents. + "Disallow: /unique-path/"
urlStringFull URL of the page."^https?//.+\\.wordpress\\.com"
metaObjectHTML meta tags, e.g. generator.{ "generator": "^WordPress$" }
scriptsString | Array + URLs of JavaScript files included on the page. + "jquery\\.js"
+ +## Patterns + +Patterns are essentially JavaScript regular expressions written as strings, but with some additions. + +### Quirks and pitfalls + +- Because of the string format, the escape character itself must be escaped when using special characters such as the dot (`\\.`). Double quotes must be escaped only once (`\"`). Slashes do not need to be escaped (`/`). +- Flags are not supported. Regular expressions are treated as case-insensitive. +- Capture groups (`()`) are used for version detection. In other cases, use non-capturing groups (`(?:)`). +- Use start and end of string anchors (`^` and `$`) where possible for optimal performance. +- Short or generic patterns can cause applications to be identified incorrectly. Try to find unique strings to match. + +### Tags + +Tags (a non-standard syntax) can be appended to patterns (and implies and excludes, separated by `\\;`) to store additional information. + + + + + + + + + + + + + + + + + + + + + + +
TagDescriptionExample
confidence + Indicates a less reliable pattern that may cause false + positives. The aim is to achieve a combined confidence of 100%. + Defaults to 100% if not specified. + + "js": { "Mage": "\\;confidence:50" } +
version + Gets the version number from a pattern match using a special + syntax. + + "scripts": "jquery-([0-9.]+)\.js\\;version:\\1" +
+ +### Version syntax + +Application version information can be obtained from a pattern using a capture group. A condition can be evaluated using the ternary operator (`?:`). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExampleDescription
\\1Returns the first match.
\\1?a: + Returns a if the first match contains a value, nothing + otherwise. +
\\1?a:b + Returns a if the first match contains a value, b otherwise. +
\\1?:b + Returns nothing if the first match contains a value, b + otherwise. +
foo\\1 + Returns foo with the first match appended. +