You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
4.2 KiB

#!/usr/bin/env node
let
app,
json = require('../src/apps.json');
for (app in json.apps) {
['headers', 'html', 'env', 'meta', 'script'].forEach((type) => {
let
key,
patterns = json.apps[app][type];
if (patterns !== undefined) {
patterns = typeof patterns === 'string' ? [patterns] : patterns;
if (!(patterns instanceof Array)) {
patterns = [];
for (key in json.apps[app][type]) {
patterns.push(json.apps[app][type][key]);
}
}
patterns.forEach((pattern) => {
let
attrs = pattern.split('\\;'),
regex = `/${attrs.shift().replace('/', '\/')}/`,
version = attrs.find(attr => attr.indexOf('version:') === 0),
amountOfCaptureGroups,
captureGroupsNeeded,
amountOfCaptureGroupsNeeded,
highestBackReference;
// Check if the pattern is a valid RegExp
// Note: unlike when used in Wappalyzer, the modifier i isn't added here
new RegExp(regex);
// Actual amount of capture groups
amountOfCaptureGroups = ''.match(new RegExp(`(?:${pattern})?`)).length - 1;
// Capture groups of which the result is used
captureGroupsNeeded = version ? (version.match(/\\\d/g) || []).filter((value, index, self) => self.indexOf(value) === index).map(value => parseInt(value.charAt(1), 10), // Will only work if backreferences cannot be any longer than a single digit
) : [];
// Amount of capture groups needed
amountOfCaptureGroupsNeeded = captureGroupsNeeded.length;
// Highest back reference number used
highestBackReference = Math.max.apply(null, captureGroupsNeeded);
// Report error
if (amountOfCaptureGroups > amountOfCaptureGroupsNeeded) {
throw new Error(`The pattern uses more capture groups than needed. Use non-capturing groups where appropriate.\n${app}: ${type}: ${pattern}`);
} else if (amountOfCaptureGroups < amountOfCaptureGroupsNeeded) {
throw new Error(`The version string references more capture groups than there are in the pattern! Remove any incorrect back references from the version string and/or add the missing capture groups to the pattern.\n${app}: ${type}: ${pattern}`);
} else if (highestBackReference > amountOfCaptureGroups) {
throw new Error(`The version string references one or more capture groups whose index is higher than the amount of capture groups in the pattern. Please use the correct index instead of \\\\${highestBackReference}.\n${app}: ${type}: ${pattern}`);
}
if (/^\/(?:\^\$|\.\+|\.\*)\/$/.test(regex)) {
throw new Error(`Pattern should be replaced with empty string.\n${app}: ${type}: ${pattern}`);
}
if (type === 'html') {
if (/\.(?:\+|\*)/.test(regex)) {
throw new Error(`Avoid ".+" and ".*" in HTML patterns. Consider using "[^>]+" or "[^<]+" instead.\n${app}: ${type}: ${pattern}`);
}
if (!/[<>]/.test(regex)) {
throw new Error(`HTML patterns must contain "<" or ">".\n${app}: ${type}: ${pattern}`);
}
}
// Warn about suspicious periods (".") in patterns which should probably have been escaped
// Periods inside character classes (such as [\d.]) don't count as wildcard, so we'll replace
// the character classes in the pattern with "_". (We could remove them entirely, but then
// we'd have to deal with leftover * and + characters; for example, removing the character
// class entirely from the pattern /test.[a-z]+/ would yield the pattern /test.+/, which
// would mean not detecting the un-escaped ".". Replacing the character class with an
// underscore instead gives /test._+/, which WOULD yield a warning about the unescaped ".".)
if (/(?:^\/|[^\\])\.(?:[^*+]|\/$)/.test(regex.replace(/([^\\]|^)\[[^\]]+\]/g, '$1_'))) {
console.warn(`Suspicious period (".") in pattern. Should this have been escaped?\n\tApp: ${app}\n\tPattern: ${type}: ${pattern}`);
}
});
}
});
if (!/^https?:\/\//i.test(json.apps[app].website)) {
throw new Error(`Invalid website URL\n${app}: ${json.apps[app].website}`);
}
}