Improve language detection

main
Elbert Alias 5 years ago
parent 304b5976a5
commit 2e2f7246f4

@ -720,7 +720,7 @@
19
],
"headers": {
"Server": "^AmazonS3$"
"server": "^AmazonS3$"
},
"icon": "aws-s3.svg",
"implies": "Amazon Web Services",
@ -13281,7 +13281,7 @@
"cats": [
13
],
"html": "<!--\\s+OTRS: Copyright \\d+-\\d+, OTRS AG",
"html": "<!--\\s+OTRS: Copyright",
"icon": "otrs.png",
"implies": "Perl",
"script": "^/otrs-web/js/",

@ -1,6 +1,7 @@
const url = require('url');
const fs = require('fs');
const path = require('path');
const cld = require('cld');
const Wappalyzer = require('./wappalyzer');
const json = JSON.parse(fs.readFileSync(path.resolve(`${__dirname}/apps.json`)));
@ -229,12 +230,31 @@ class Driver {
const html = processHtml(browser.html, this.options.htmlMaxCols, this.options.htmlMaxRows);
const js = processJs(browser.js, this.wappalyzer.jsPatterns);
let language = null;
try {
language = await new Promise((resolve, reject) => cld.detect(html, { isHTML: true }, (error, { languages }) => {
if (error) {
reject(error);
}
resolve(
languages
.filter(({ percent }) => percent >= 75)
.map(({ code }) => code)[0],
);
}));
} catch (error) {
this.wappalyzer.log(`${error.message || error}; url: ${pageUrl.href}`, 'driver', 'error');
}
await this.wappalyzer.analyze(pageUrl, {
cookies,
headers,
html,
js,
scripts,
language,
});
const reducedLinks = Array.prototype.reduce.call(

@ -4,11 +4,6 @@
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"@types/mime-types": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@types/mime-types/-/mime-types-2.1.0.tgz",
"integrity": "sha1-nKUs2jY/aZxpRmwqbM2q2RPqenM="
},
"abab": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/abab/-/abab-2.0.3.tgz",
@ -40,11 +35,6 @@
"resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.2.0.tgz",
"integrity": "sha512-7evsyfH1cLOCdAzZAd43Cic04yKydNx0cF+7tiA19p1XnLLPU4dpCQOqpjqwokFe//vS0QqfqqjCS2JkiIs0cA=="
},
"agent-base": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-5.1.1.tgz",
"integrity": "sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g=="
},
"ajv": {
"version": "6.12.0",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.0.tgz",
@ -135,21 +125,36 @@
"resolved": "https://registry.npmjs.org/browser-process-hrtime/-/browser-process-hrtime-0.1.3.tgz",
"integrity": "sha512-bRFnI4NnjO6cnyLmOV/7PVoDEMJChlcfN0z4s1YMBY989/SvlfMI1lgCnkFUs53e9gQF+w7qu7XdllSTiSl8Aw=="
},
"buffer-crc32": {
"version": "0.2.13",
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
"integrity": "sha1-DTM+PwDqxQqhRUq9MO+MKl2ackI="
},
"buffer-from": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz",
"integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A=="
},
"caseless": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
"integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw="
},
"cld": {
"version": "2.6.0",
"resolved": "https://registry.npmjs.org/cld/-/cld-2.6.0.tgz",
"integrity": "sha512-2U8Uiv7Bvl1v4fNWFGB3RYtPvhUWXQJ1MoNKJNVuoALfandEt9oVqK64S+3ZLvQPjDiYjsohtTep/wIs0xOXkw==",
"requires": {
"glob": "^5.0.10",
"node-addon-api": "*",
"rimraf": "^2.4.0",
"underscore": "^1.6.0"
},
"dependencies": {
"glob": {
"version": "5.0.15",
"resolved": "https://registry.npmjs.org/glob/-/glob-5.0.15.tgz",
"integrity": "sha1-G8k2ueAvSmA/zCIuz3Yz0wuLk7E=",
"requires": {
"inflight": "^1.0.4",
"inherits": "2",
"minimatch": "2 || 3",
"once": "^1.3.0",
"path-is-absolute": "^1.0.0"
}
}
}
},
"combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
@ -163,17 +168,6 @@
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s="
},
"concat-stream": {
"version": "1.6.2",
"resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
"integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==",
"requires": {
"buffer-from": "^1.0.0",
"inherits": "^2.0.3",
"readable-stream": "^2.2.2",
"typedarray": "^0.0.6"
}
},
"core-js": {
"version": "2.6.11",
"resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.11.tgz",
@ -302,32 +296,6 @@
"resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
"integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="
},
"extract-zip": {
"version": "1.7.0",
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-1.7.0.tgz",
"integrity": "sha512-xoh5G1W/PB0/27lXgMQyIhP5DSY/LhoCsOyZgb+6iMmRtCwVBo55uKaMoEYrDCKQhWvqEip5ZPKAc6eFNyf/MA==",
"requires": {
"concat-stream": "^1.6.2",
"debug": "^2.6.9",
"mkdirp": "^0.5.4",
"yauzl": "^2.10.0"
},
"dependencies": {
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
}
}
},
"extsprintf": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
@ -348,14 +316,6 @@
"resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
"integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc="
},
"fd-slicer": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
"integrity": "sha1-JcfInLH5B3+IkbvmHY85Dq4lbx4=",
"requires": {
"pend": "~1.2.0"
}
},
"forever-agent": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
@ -429,15 +389,6 @@
"sshpk": "^1.7.0"
}
},
"https-proxy-agent": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz",
"integrity": "sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==",
"requires": {
"agent-base": "5",
"debug": "4"
}
},
"iconv-lite": {
"version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
@ -465,11 +416,6 @@
"resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz",
"integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo="
},
"isarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE="
},
"isstream": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
@ -599,24 +545,16 @@
"brace-expansion": "^1.1.7"
}
},
"minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw=="
},
"mkdirp": {
"version": "0.5.5",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz",
"integrity": "sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==",
"requires": {
"minimist": "^1.2.5"
}
},
"ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"node-addon-api": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-2.0.0.tgz",
"integrity": "sha512-ASCL5U13as7HhOExbT6OlWJJUV/lLzL2voOSP1UVehpRD8FbSrSDjfScK/KwAvVTI5AS6r4VwbOMlIqtvRidnA=="
},
"nwsapi": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.0.tgz",
@ -666,11 +604,6 @@
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
"integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18="
},
"pend": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
"integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA="
},
"performance-now": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
@ -686,21 +619,6 @@
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz",
"integrity": "sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ="
},
"process-nextick-args": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag=="
},
"progress": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
"integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA=="
},
"proxy-from-env": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
},
"psl": {
"version": "1.7.0",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.7.0.tgz",
@ -711,23 +629,6 @@
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
"integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A=="
},
"puppeteer": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-2.1.1.tgz",
"integrity": "sha512-LWzaDVQkk1EPiuYeTOj+CZRIjda4k2s5w4MK4xoH2+kgWV/SDlkYHmxatDdtYrciHUKSXTsGgPgPP8ILVdBsxg==",
"requires": {
"@types/mime-types": "^2.1.0",
"debug": "^4.1.0",
"extract-zip": "^1.6.6",
"https-proxy-agent": "^4.0.0",
"mime": "^2.0.3",
"mime-types": "^2.1.25",
"progress": "^2.0.1",
"proxy-from-env": "^1.0.0",
"rimraf": "^2.6.1",
"ws": "^6.1.0"
}
},
"qs": {
"version": "6.5.2",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz",
@ -738,27 +639,6 @@
"resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.1.1.tgz",
"integrity": "sha512-w7fLxIRCRT7U8Qu53jQnJyPkYZIaR4n5151KMfcJlO/A9397Wxb1amJvROTK6TOnp7PfoAmg/qXiNHI+08jRfA=="
},
"readable-stream": {
"version": "2.3.7",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.7.tgz",
"integrity": "sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==",
"requires": {
"core-util-is": "~1.0.0",
"inherits": "~2.0.3",
"isarray": "~1.0.0",
"process-nextick-args": "~2.0.0",
"safe-buffer": "~5.1.1",
"string_decoder": "~1.1.1",
"util-deprecate": "~1.0.1"
},
"dependencies": {
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
}
}
},
"regenerator-runtime": {
"version": "0.11.1",
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.11.1.tgz",
@ -864,21 +744,6 @@
"resolved": "https://registry.npmjs.org/stealthy-require/-/stealthy-require-1.1.1.tgz",
"integrity": "sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks="
},
"string_decoder": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
"requires": {
"safe-buffer": "~5.1.0"
},
"dependencies": {
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
}
}
},
"symbol-tree": {
"version": "3.2.4",
"resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz",
@ -922,10 +787,10 @@
"prelude-ls": "~1.1.2"
}
},
"typedarray": {
"version": "0.0.6",
"resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
"integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c="
"underscore": {
"version": "1.10.2",
"resolved": "https://registry.npmjs.org/underscore/-/underscore-1.10.2.tgz",
"integrity": "sha512-N4P+Q/BuyuEKFJ43B9gYuOj4TQUHXX+j2FqguVOpjkssLUUrnJofCcBccJSCoeturDoZU6GorDTHSvUDlSQbTg=="
},
"uri-js": {
"version": "4.2.2",
@ -944,11 +809,6 @@
"requires-port": "^1.0.0"
}
},
"util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8="
},
"uuid": {
"version": "3.4.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz",
@ -1023,15 +883,6 @@
"resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz",
"integrity": "sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw=="
},
"yauzl": {
"version": "2.10.0",
"resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
"integrity": "sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk=",
"requires": {
"buffer-crc32": "~0.2.3",
"fd-slicer": "~1.1.0"
}
},
"zombie": {
"version": "6.1.4",
"resolved": "https://registry.npmjs.org/zombie/-/zombie-6.1.4.tgz",

@ -27,6 +27,7 @@
"wappalyzer": "./cli.js"
},
"dependencies": {
"cld": "^2.6.0",
"zombie": "^6.1.4"
},
"peerDependencies": {

@ -3,7 +3,7 @@
*/
/* eslint-env browser */
/* global browser, chrome, fetch, Wappalyzer */
/* global browser, chrome, Wappalyzer */
/** global: browser */
/** global: chrome */
@ -24,10 +24,6 @@ browser.tabs.onRemoved.addListener((tabId) => {
function userAgent() {
const url = chrome.extension.getURL('/');
if (url.match(/^chrome-/)) {
return 'chrome';
}
if (url.match(/^moz-/)) {
return 'firefox';
}
@ -35,6 +31,8 @@ function userAgent() {
if (url.match(/^ms-browser-/)) {
return 'edge';
}
return 'chrome';
}
/**
@ -162,7 +160,20 @@ browser.runtime.onConnect.addListener((port) => {
break;
case 'analyze':
wappalyzer.analyze(url, message.subject, { tab: port.sender.tab });
if (message.subject.html) {
browser.i18n.detectLanguage(message.subject.html)
.then(({ languages }) => {
const language = languages
.filter(({ percentage }) => percentage >= 75)
.map(({ language: lang }) => lang)[0];
message.subject.language = language;
wappalyzer.analyze(url, message.subject, { tab: port.sender.tab });
});
} else {
wappalyzer.analyze(url, message.subject, { tab: port.sender.tab });
}
await setOption('hostnameCache', wappalyzer.hostnameCache);
@ -219,7 +230,7 @@ wappalyzer.driver.document = document;
wappalyzer.driver.log = (message, source, type) => {
const log = ['warn', 'error'].indexOf(type) !== -1 ? type : 'log';
console.log(`[wappalyzer ${type}]`, `[${source}]`, message); // eslint-disable-line no-console
console[log](`[wappalyzer ${type}]`, `[${source}]`, message); // eslint-disable-line no-console
};
/**

@ -1,4 +1,5 @@
/* eslint-env browser */
/* eslint-disable no-restricted-globals, no-prototype-builtins */
(() => {
try {
@ -7,7 +8,7 @@
let value = properties.length ? window : null;
for (let i = 0; i < properties.length; i++) {
for (let i = 0; i < properties.length; i += 1) {
const property = properties[i];
if (value && value.hasOwnProperty(property)) {

@ -172,7 +172,7 @@ class Wappalyzer {
let matches = data.html.match(new RegExp('<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"', 'i'));
language = matches && matches.length ? matches[1] : null;
language = matches && matches.length ? matches[1] : data.language || null;
// Meta tags
const regex = /<meta[^>]+>/ig;