Improve parsing of minified HTML

main
Elbert Alias 7 years ago
parent a2a18e12b5
commit ad3186060a

@ -75,7 +75,7 @@ class Driver {
chunkSize: 5, chunkSize: 5,
debug: false, debug: false,
delay: 500, delay: 500,
htmlMaxCols: 200, htmlMaxCols: 2000,
htmlMaxRows: 3000, htmlMaxRows: 3000,
maxDepth: 3, maxDepth: 3,
maxUrls: 10, maxUrls: 10,
@ -212,14 +212,11 @@ class Driver {
} }
const headers = getHeaders(browser); const headers = getHeaders(browser);
const html = this.getHtml(browser) const html = this.getHtml(browser);
;//.replace(new RegExp(`(.{${this.options.htmlMaxCols},}[^>]*>)<`, 'g'), (match, p1) => `${p1}\n<`);
const scripts = getScripts(browser); const scripts = getScripts(browser);
const js = this.getJs(browser); const js = this.getJs(browser);
const cookies = getCookies(browser); const cookies = getCookies(browser);
// console.log({ html, foo: html.split('\n').length });
this.wappalyzer.analyze(pageUrl, { this.wappalyzer.analyze(pageUrl, {
headers, headers,
html, html,
@ -286,6 +283,7 @@ class Driver {
try { try {
html = browser.html() html = browser.html()
.replace(new RegExp(`(.{${this.options.htmlMaxCols},}[^>]*>)<`, 'g'), (match, p1) => `${p1}\n<`)
.split('\n') .split('\n')
.slice(0, this.options.htmlMaxRows / 2) .slice(0, this.options.htmlMaxRows / 2)
.concat(html.slice(html.length - this.options.htmlMaxRows / 2)) .concat(html.slice(html.length - this.options.htmlMaxRows / 2))

@ -1,6 +1,6 @@
{ {
"name": "wappalyzer", "name": "wappalyzer",
"version": "5.5.4", "version": "5.5.5",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

@ -2,7 +2,7 @@
"name": "wappalyzer", "name": "wappalyzer",
"description": "Uncovers the technologies used on websites", "description": "Uncovers the technologies used on websites",
"homepage": "https://github.com/AliasIO/Wappalyzer", "homepage": "https://github.com/AliasIO/Wappalyzer",
"version": "5.5.5", "version": "5.5.3",
"author": "Elbert Alias", "author": "Elbert Alias",
"license": "GPL-3.0", "license": "GPL-3.0",
"repository": { "repository": {

@ -4,7 +4,7 @@
"author": "Elbert Alias", "author": "Elbert Alias",
"homepage_url": "https://www.wappalyzer.com", "homepage_url": "https://www.wappalyzer.com",
"description": "Identify web technologies", "description": "Identify web technologies",
"version": "5.5.5", "version": "5.5.3",
"default_locale": "en", "default_locale": "en",
"manifest_version": 2, "manifest_version": 2,
"icons": { "icons": {