#!/usr/bin/env node let app, json = require('../src/apps.json'); for (app in json.apps) { ['headers', 'html', 'env', 'meta', 'script'].forEach((type) => { let key, patterns = json.apps[app][type]; if (patterns !== undefined) { patterns = typeof patterns === 'string' ? [patterns] : patterns; if (!(patterns instanceof Array)) { patterns = []; for (key in json.apps[app][type]) { patterns.push(json.apps[app][type][key]); } } patterns.forEach((pattern) => { let attrs = pattern.split('\\;'), regex = `/${attrs.shift().replace('/', '\/')}/`, version = attrs.find(attr => attr.indexOf('version:') === 0), amountOfCaptureGroups, captureGroupsNeeded, amountOfCaptureGroupsNeeded, highestBackReference; // Check if the pattern is a valid RegExp // Note: unlike when used in Wappalyzer, the modifier i isn't added here new RegExp(regex); // Actual amount of capture groups amountOfCaptureGroups = ''.match(new RegExp(`(?:${pattern})?`)).length - 1; // Capture groups of which the result is used captureGroupsNeeded = version ? (version.match(/\\\d/g) || []).filter((value, index, self) => self.indexOf(value) === index).map(value => parseInt(value.charAt(1), 10), // Will only work if backreferences cannot be any longer than a single digit ) : []; // Amount of capture groups needed amountOfCaptureGroupsNeeded = captureGroupsNeeded.length; // Highest back reference number used highestBackReference = Math.max.apply(null, captureGroupsNeeded); // Report error if (amountOfCaptureGroups > amountOfCaptureGroupsNeeded) { throw new Error(`The pattern uses more capture groups than needed. Use non-capturing groups where appropriate.\n${app}: ${type}: ${pattern}`); } else if (amountOfCaptureGroups < amountOfCaptureGroupsNeeded) { throw new Error(`The version string references more capture groups than there are in the pattern! Remove any incorrect back references from the version string and/or add the missing capture groups to the pattern.\n${app}: ${type}: ${pattern}`); } else if (highestBackReference > amountOfCaptureGroups) { throw new Error(`The version string references one or more capture groups whose index is higher than the amount of capture groups in the pattern. Please use the correct index instead of \\\\${highestBackReference}.\n${app}: ${type}: ${pattern}`); } if (/^\/(?:\^\$|\.\+|\.\*)\/$/.test(regex)) { throw new Error(`Pattern should be replaced with empty string.\n${app}: ${type}: ${pattern}`); } if (type === 'html') { if (/\.(?:\+|\*)/.test(regex)) { throw new Error(`Avoid ".+" and ".*" in HTML patterns. Consider using "[^>]+" or "[^<]+" instead.\n${app}: ${type}: ${pattern}`); } if (!/[<>]/.test(regex)) { throw new Error(`HTML patterns must contain "<" or ">".\n${app}: ${type}: ${pattern}`); } } // Warn about suspicious periods (".") in patterns which should probably have been escaped // Periods inside character classes (such as [\d.]) don't count as wildcard, so we'll replace // the character classes in the pattern with "_". (We could remove them entirely, but then // we'd have to deal with leftover * and + characters; for example, removing the character // class entirely from the pattern /test.[a-z]+/ would yield the pattern /test.+/, which // would mean not detecting the un-escaped ".". Replacing the character class with an // underscore instead gives /test._+/, which WOULD yield a warning about the unescaped ".".) if (/(?:^\/|[^\\])\.(?:[^*+]|\/$)/.test(regex.replace(/([^\\]|^)\[[^\]]+\]/g, '$1_'))) { console.warn(`Suspicious period (".") in pattern. Should this have been escaped?\n\tApp: ${app}\n\tPattern: ${type}: ${pattern}`); } }); } }); if (!/^https?:\/\//i.test(json.apps[app].website)) { throw new Error(`Invalid website URL\n${app}: ${json.apps[app].website}`); } }