This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.
wappalyzer/bin/validate-regex

98 lines
4.1 KiB

#!/usr/bin/env node
var
app,
json = require('../src/apps.json');
for ( app in json.apps ) {
['headers', 'html', 'env', 'meta', 'script'].forEach(function(type) {
var
key,
patterns = json.apps[app][type];
if ( patterns !== undefined ) {
patterns = typeof patterns === 'string' ? [patterns] : patterns;
if ( !( patterns instanceof Array ) ) {
patterns = [];
for ( key in json.apps[app][type] ) {
patterns.push(json.apps[app][type][key]);
}
}
patterns.forEach(function(pattern) {
var
attrs = pattern.split('\\;'),
regex = '/' + attrs.shift().replace('/', '\/') + '/',
version = attrs.find( function (attr){
return attr.indexOf('version:')===0;
} ),
amountOfCaptureGroups,
captureGroupsNeeded,
amountOfCaptureGroupsNeeded,
highestBackReference;
//Check if the pattern is a valid RegExp
//Note: unlike when used in Wappalyzer, the modifier i isn't added here
new RegExp(regex);
//Actual amount of capture groups
amountOfCaptureGroups = ''.match(new RegExp('(?:'+pattern+')?')).length - 1;
//Capture groups of which the result is used
captureGroupsNeeded = version ? (version.match(/\\\d/g) || []).filter( function(value, index, self){
return self.indexOf(value) === index;
}).map( function(value){//Because Math.max needs the integers only
return parseInt(value.charAt(1), 10);//Will only work if backreferences cannot be any longer than a single digit
}) : [];
//Amount of capture groups needed
amountOfCaptureGroupsNeeded = captureGroupsNeeded.length;
//Highest back reference number used
highestBackReference = Math.max.apply(null, captureGroupsNeeded);
//Report error
if(amountOfCaptureGroups > amountOfCaptureGroupsNeeded) {
throw new Error('The pattern uses more capture groups than needed. Use non-capturing groups where appropriate.\n' + app + ': ' + type + ': ' + pattern);
}else if(amountOfCaptureGroups < amountOfCaptureGroupsNeeded){
throw new Error('The version string references more capture groups than there are in the pattern! Remove any incorrect back references from the version string and/or add the missing capture groups to the pattern.\n' + app + ': ' + type + ': ' + pattern);
} else if(highestBackReference > amountOfCaptureGroups){
throw new Error('The version string references one or more capture groups whose index is higher than the amount of capture groups in the pattern. Please use the correct index instead of \\\\'+highestBackReference+'.\n' + app + ': ' + type + ': ' + pattern);
}
if ( /^\/(?:\^\$|\.\+|\.\*)\/$/.test(regex) ) {
throw new Error('Pattern should be replaced with empty string.\n' + app + ': ' + type + ': ' + pattern);
}
if ( type === 'html' ) {
if ( /\.(?:\+|\*)/.test(regex) ) {
throw new Error('Avoid ".+" and ".*" in HTML patterns. Consider using "[^>]+" or "[^<]+" instead.\n' + app + ': ' + type + ': ' + pattern);
}
if ( !/[<>]/.test(regex) ) {
throw new Error('HTML patterns must contain "<" or ">".\n' + app + ': ' + type + ': ' + pattern);
}
}
//Warn about suspicious periods (".") in patterns which should probably have been escaped
// Periods inside character classes (such as [\d.]) don't count as wildcard, so we'll replace
// the character classes in the pattern with "_". (We could remove them entirely, but then
// we'd have to deal with leftover * and + characters; for example, removing the character
// class entirely from the pattern /test.[a-z]+/ would yield the pattern /test.+/, which
// would mean not detecting the un-escaped ".". Replacing the character class with an
// underscore instead gives /test._+/, which WOULD yield a warning about the unescaped ".".)
if ( /(?:^\/|[^\\])\.(?:[^*+]|\/$)/.test(regex.replace(/([^\\]|^)\[[^\]]+\]/g,'$1_') ) ) {
console.warn('Suspicious period (".") in pattern. Should this have been escaped?\n\tApp: ' + app + '\n\tPattern: ' + type + ': ' + pattern);
}
});
}
});
8 years ago
if ( !/^https?:\/\//i.test(json.apps[app].website) ) {
throw new Error('Invalid website URL\n' + app + ': ' + json.apps[app].website);
}
}