diff --git a/src/drivers/npm/README.md b/src/drivers/npm/README.md index f84b76bec..d77b2270a 100644 --- a/src/drivers/npm/README.md +++ b/src/drivers/npm/README.md @@ -27,10 +27,11 @@ node index.js [url] [options] ### Options ``` + --chunk-size=num Process links in chunks. --debug=0|1 Output debug messages. --delay=ms Wait for ms milliseconds between requests. - --max-depth=num Don't analyze pages more than num levels deep. - --max-urls=num Exit when num URLs have been analyzed. + --max-depth=num Don't analyse pages more than num levels deep. + --max-urls=num Exit when num URLs have been analysed. --max-wait=ms Wait no more than ms milliseconds for page resources to load. --recursive=0|1 Follow links on pages (crawler). --user-agent=str Set the user agent string. diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index 6b6df682b..399999089 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -13,6 +13,7 @@ const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/; class Driver { constructor(pageUrl, options) { this.options = Object.assign({}, { + chunkSize: 5, debug: false, delay: 500, maxDepth: 3, @@ -133,6 +134,7 @@ class Driver { }); const links = Array.from(browser.document.getElementsByTagName('a')) + .filter(link => link.protocol === 'http:' || link.protocol === 'https:') .filter(link => link.hostname === this.origPageUrl.hostname) .filter(link => extensions.test(link.pathname)) .map(link => { link.hash = ''; return url.parse(link.href) }); @@ -256,7 +258,7 @@ class Driver { return js; } - crawl(pageUrl, index = 1, depth = 1) { + crawl(pageUrl, index, depth = 1) { pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname; return new Promise(resolve => { @@ -264,7 +266,7 @@ class Driver { .catch(() => {}) .then(links => { if ( links && Boolean(this.options.recursive) && depth < this.options.maxDepth ) { - return Promise.all(links.map((link, index) => this.crawl(link, index + 1, depth + 1))); + return this.chunk(links.slice(0, this.options.maxUrls), depth + 1); } else { return Promise.resolve(); } @@ -279,6 +281,20 @@ class Driver { }); } + chunk(links, depth, chunk = 0) { + if ( links.length === 0 ) { + return Promise.resolve(); + } + + const chunked = links.splice(0, this.options.chunkSize); + + return new Promise(resolve => { + Promise.all(chunked.map((link, index) => this.crawl(link, index, depth))) + .then(() => this.chunk(links, depth, chunk + 1)) + .then(() => resolve()); + }); + } + sleep(ms) { return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve(); } diff --git a/src/drivers/npm/index.js b/src/drivers/npm/index.js index c8c254850..77ff1c160 100644 --- a/src/drivers/npm/index.js +++ b/src/drivers/npm/index.js @@ -28,6 +28,12 @@ while ( arg = args.shift() ) { const wappalyzer = new Wappalyzer(url, options); +setTimeout(() => { + console.log('force quit'); + + process.exit(1); +}, 10000); + wappalyzer.analyze() .then(json => { process.stdout.write(JSON.stringify(json) + '\n') diff --git a/src/drivers/npm/package.json b/src/drivers/npm/package.json index f69102b7e..74397a22c 100644 --- a/src/drivers/npm/package.json +++ b/src/drivers/npm/package.json @@ -2,7 +2,7 @@ "name": "wappalyzer", "description": "Uncovers the technologies used on websites", "homepage": "https://github.com/AliasIO/Wappalyzer", - "version": "5.4.3", + "version": "5.4.4", "author": "Elbert Alias", "license": "GPL-3.0", "repository": { diff --git a/src/drivers/webextension/js/popup.js b/src/drivers/webextension/js/popup.js index 377934f97..a9ffc0aab 100644 --- a/src/drivers/webextension/js/popup.js +++ b/src/drivers/webextension/js/popup.js @@ -70,7 +70,7 @@ function appsToDomTemplate(response) { 'a', { class: 'detected__app', target: '_blank', - href: 'https://www.wappalyzer.com/applications/' + slugify(appName) + href: 'https://www.wappalyzer.com/technologies/' + slugify(appName) }, [ 'img', { class: 'detected__app-icon', diff --git a/src/drivers/webextension/manifest.json b/src/drivers/webextension/manifest.json index 28cc5d124..74292b879 100644 --- a/src/drivers/webextension/manifest.json +++ b/src/drivers/webextension/manifest.json @@ -4,7 +4,7 @@ "author": "Elbert Alias", "homepage_url": "https://www.wappalyzer.com", "description": "Identify web technologies", - "version": "5.4.3", + "version": "5.4.4", "default_locale": "en", "manifest_version": 2, "icons": {