From ad3186060aaad0e38732ca4b6fd8c0706cd69443 Mon Sep 17 00:00:00 2001
From: Elbert Alias <77259+AliasIO@users.noreply.github.com>
Date: Sat, 27 Oct 2018 09:25:42 +1100
Subject: [PATCH] Improve parsing of minified HTML

---
 src/drivers/npm/driver.js              | 8 +++-----
 src/drivers/npm/npm-shrinkwrap.json    | 2 +-
 src/drivers/npm/package.json           | 2 +-
 src/drivers/webextension/manifest.json | 2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js
index 0bf265de3..aa9e6a78b 100644
--- a/src/drivers/npm/driver.js
+++ b/src/drivers/npm/driver.js
@@ -75,7 +75,7 @@ class Driver {
       chunkSize: 5,
       debug: false,
       delay: 500,
-      htmlMaxCols: 200,
+      htmlMaxCols: 2000,
       htmlMaxRows: 3000,
       maxDepth: 3,
       maxUrls: 10,
@@ -212,14 +212,11 @@ class Driver {
       }
 
       const headers = getHeaders(browser);
-      const html = this.getHtml(browser)
-      ;//.replace(new RegExp(`(.{${this.options.htmlMaxCols},}[^>]*>)<`, 'g'), (match, p1) => `${p1}\n<`);
+      const html = this.getHtml(browser);
       const scripts = getScripts(browser);
       const js = this.getJs(browser);
       const cookies = getCookies(browser);
 
-      // console.log({ html, foo: html.split('\n').length });
-
       this.wappalyzer.analyze(pageUrl, {
         headers,
         html,
@@ -286,6 +283,7 @@ class Driver {
 
     try {
       html = browser.html()
+        .replace(new RegExp(`(.{${this.options.htmlMaxCols},}[^>]*>)<`, 'g'), (match, p1) => `${p1}\n<`)
         .split('\n')
         .slice(0, this.options.htmlMaxRows / 2)
         .concat(html.slice(html.length - this.options.htmlMaxRows / 2))
diff --git a/src/drivers/npm/npm-shrinkwrap.json b/src/drivers/npm/npm-shrinkwrap.json
index f76d89198..ae0b7f7cc 100644
--- a/src/drivers/npm/npm-shrinkwrap.json
+++ b/src/drivers/npm/npm-shrinkwrap.json
@@ -1,6 +1,6 @@
 {
   "name": "wappalyzer",
-  "version": "5.5.4",
+  "version": "5.5.5",
   "lockfileVersion": 1,
   "requires": true,
   "dependencies": {
diff --git a/src/drivers/npm/package.json b/src/drivers/npm/package.json
index 0d4e60cec..88df89fd2 100644
--- a/src/drivers/npm/package.json
+++ b/src/drivers/npm/package.json
@@ -2,7 +2,7 @@
   "name": "wappalyzer",
   "description": "Uncovers the technologies used on websites",
   "homepage": "https://github.com/AliasIO/Wappalyzer",
-  "version": "5.5.5",
+  "version": "5.5.3",
   "author": "Elbert Alias",
   "license": "GPL-3.0",
   "repository": {
diff --git a/src/drivers/webextension/manifest.json b/src/drivers/webextension/manifest.json
index 41b1b0886..a17e6b097 100644
--- a/src/drivers/webextension/manifest.json
+++ b/src/drivers/webextension/manifest.json
@@ -4,7 +4,7 @@
 	"author": "Elbert Alias",
 	"homepage_url": "https://www.wappalyzer.com",
 	"description": "Identify web technologies",
-	"version": "5.5.5",
+	"version": "5.5.3",
 	"default_locale": "en",
 	"manifest_version": 2,
 	"icons": {