From ad3186060aaad0e38732ca4b6fd8c0706cd69443 Mon Sep 17 00:00:00 2001 From: Elbert Alias <77259+AliasIO@users.noreply.github.com> Date: Sat, 27 Oct 2018 09:25:42 +1100 Subject: [PATCH] Improve parsing of minified HTML --- src/drivers/npm/driver.js | 8 +++----- src/drivers/npm/npm-shrinkwrap.json | 2 +- src/drivers/npm/package.json | 2 +- src/drivers/webextension/manifest.json | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index 0bf265de3..aa9e6a78b 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -75,7 +75,7 @@ class Driver { chunkSize: 5, debug: false, delay: 500, - htmlMaxCols: 200, + htmlMaxCols: 2000, htmlMaxRows: 3000, maxDepth: 3, maxUrls: 10, @@ -212,14 +212,11 @@ class Driver { } const headers = getHeaders(browser); - const html = this.getHtml(browser) - ;//.replace(new RegExp(`(.{${this.options.htmlMaxCols},}[^>]*>)<`, 'g'), (match, p1) => `${p1}\n<`); + const html = this.getHtml(browser); const scripts = getScripts(browser); const js = this.getJs(browser); const cookies = getCookies(browser); - // console.log({ html, foo: html.split('\n').length }); - this.wappalyzer.analyze(pageUrl, { headers, html, @@ -286,6 +283,7 @@ class Driver { try { html = browser.html() + .replace(new RegExp(`(.{${this.options.htmlMaxCols},}[^>]*>)<`, 'g'), (match, p1) => `${p1}\n<`) .split('\n') .slice(0, this.options.htmlMaxRows / 2) .concat(html.slice(html.length - this.options.htmlMaxRows / 2)) diff --git a/src/drivers/npm/npm-shrinkwrap.json b/src/drivers/npm/npm-shrinkwrap.json index f76d89198..ae0b7f7cc 100644 --- a/src/drivers/npm/npm-shrinkwrap.json +++ b/src/drivers/npm/npm-shrinkwrap.json @@ -1,6 +1,6 @@ { "name": "wappalyzer", - "version": "5.5.4", + "version": "5.5.5", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/src/drivers/npm/package.json b/src/drivers/npm/package.json index 0d4e60cec..88df89fd2 100644 --- a/src/drivers/npm/package.json +++ b/src/drivers/npm/package.json @@ -2,7 +2,7 @@ "name": "wappalyzer", "description": "Uncovers the technologies used on websites", "homepage": "https://github.com/AliasIO/Wappalyzer", - "version": "5.5.5", + "version": "5.5.3", "author": "Elbert Alias", "license": "GPL-3.0", "repository": { diff --git a/src/drivers/webextension/manifest.json b/src/drivers/webextension/manifest.json index 41b1b0886..a17e6b097 100644 --- a/src/drivers/webextension/manifest.json +++ b/src/drivers/webextension/manifest.json @@ -4,7 +4,7 @@ "author": "Elbert Alias", "homepage_url": "https://www.wappalyzer.com", "description": "Identify web technologies", - "version": "5.5.5", + "version": "5.5.3", "default_locale": "en", "manifest_version": 2, "icons": {