Improve parsing of minified HTML

main
Elbert Alias 6 years ago
parent a2a18e12b5
commit ad3186060a

@ -75,7 +75,7 @@ class Driver {
chunkSize: 5,
debug: false,
delay: 500,
htmlMaxCols: 200,
htmlMaxCols: 2000,
htmlMaxRows: 3000,
maxDepth: 3,
maxUrls: 10,
@ -212,14 +212,11 @@ class Driver {
}
const headers = getHeaders(browser);
const html = this.getHtml(browser)
;//.replace(new RegExp(`(.{${this.options.htmlMaxCols},}[^>]*>)<`, 'g'), (match, p1) => `${p1}\n<`);
const html = this.getHtml(browser);
const scripts = getScripts(browser);
const js = this.getJs(browser);
const cookies = getCookies(browser);
// console.log({ html, foo: html.split('\n').length });
this.wappalyzer.analyze(pageUrl, {
headers,
html,
@ -286,6 +283,7 @@ class Driver {
try {
html = browser.html()
.replace(new RegExp(`(.{${this.options.htmlMaxCols},}[^>]*>)<`, 'g'), (match, p1) => `${p1}\n<`)
.split('\n')
.slice(0, this.options.htmlMaxRows / 2)
.concat(html.slice(html.length - this.options.htmlMaxRows / 2))

@ -1,6 +1,6 @@
{
"name": "wappalyzer",
"version": "5.5.4",
"version": "5.5.5",
"lockfileVersion": 1,
"requires": true,
"dependencies": {

@ -2,7 +2,7 @@
"name": "wappalyzer",
"description": "Uncovers the technologies used on websites",
"homepage": "https://github.com/AliasIO/Wappalyzer",
"version": "5.5.5",
"version": "5.5.3",
"author": "Elbert Alias",
"license": "GPL-3.0",
"repository": {

@ -4,7 +4,7 @@
"author": "Elbert Alias",
"homepage_url": "https://www.wappalyzer.com",
"description": "Identify web technologies",
"version": "5.5.5",
"version": "5.5.3",
"default_locale": "en",
"manifest_version": 2,
"icons": {