diff --git a/schema.json b/schema.json index b88eb63f6..9160c3e83 100644 --- a/schema.json +++ b/schema.json @@ -97,6 +97,19 @@ } ] }, + "robots": { + "oneOf": [ + { + "type": "array", + "items": { + "$ref": "#/definitions/non-empty-non-blank-string" + } + }, + { + "$ref": "#/definitions/non-empty-non-blank-string" + } + ] + }, "excludes": { "oneOf": [ { diff --git a/src/apps.json b/src/apps.json index 9688b2f75..3d681be5c 100644 --- a/src/apps.json +++ b/src/apps.json @@ -317,6 +317,28 @@ "script": "[^a-z]adnegah.*\\.js$", "website": "https://Adnegah.net" }, + "Adobe Analytics": { + "cats": [ + 10, + 61 + ], + "js": { + "s_c_il.0._c": "s_c", + "s_c_il.1._c": "s_c", + "s_c_il.2._c": "s_c", + "s_c_il.3._c": "s_c", + "s_c_il.4._c": "s_c", + "s_c_il.5._c": "s_c", + "s_c_il.0.constructor.name": "AppMeasurement", + "s_c_il.1.constructor.name": "AppMeasurement", + "s_c_il.2.constructor.name": "AppMeasurement", + "s_c_il.3.constructor.name": "AppMeasurement", + "s_c_il.4.constructor.name": "AppMeasurement", + "s_c_il.5.constructor.name": "AppMeasurement" + }, + "icon": "Adobe Analytics.svg", + "website": "https://www.adobe.com/analytics/adobe-analytics.html" + }, "Adobe ColdFusion": { "cats": [ 18 @@ -362,8 +384,24 @@ "/etc/clientlibs/", "/etc\\.clientlibs/" ], + "icon": "Adobe.svg", "website": "https://www.adobe.com/marketing/experience-manager.html" }, + "Adobe Experience Platform Identity Service": { + "cats": [ + 61 + ], + "js": { + "s_c_il.0._c": "Visitor", + "s_c_il.1._c": "Visitor", + "s_c_il.2._c": "Visitor", + "s_c_il.3._c": "Visitor", + "s_c_il.4._c": "Visitor", + "s_c_il.5._c": "Visitor" + }, + "icon": "Adobe.svg", + "website": "https://docs.adobe.com/content/help/en/id-service/using/home.html" + }, "Adobe GoLive": { "cats": [ 20 @@ -394,6 +432,18 @@ "script": "(?:wh(?:utils|ver|proxy|lang|topic|msg)|ehlpdhtm)\\.js", "website": "http://adobe.com/products/robohelp.html" }, + "Adobe Target": { + "cats": [ + 32, + 61 + ], + "js": { + "adobe.target": "", + "adobe.target.VERSION": "^(.+)$\\;version:\\1" + }, + "icon": "Adobe.svg", + "website": "https://www.adobe.com/marketing/target.html" + }, "AdonisJS": { "cats": [ 18 @@ -1025,7 +1075,10 @@ 10 ], "icon": "AppDynamics.png", - "script": "adrum\\.js|adrum\\.([0-9].*)\\.js\\;version:\\1", + "js": { + "ADRUM.conf.agentVer": "^(.+)$\\;version:\\1" + }, + "script": "adrum", "website": "https://appdynamics.com" }, "AppNexus": { @@ -4787,7 +4840,6 @@ }, "Google Analytics Enhanced eCommerce": { "cats": [ - 6, 10 ], "icon": "Google Analytics.svg", @@ -6017,6 +6069,16 @@ "implies": "Java", "website": "http://www.eclipse.org/jetty" }, + "Jibres": { + "cats": [ + 6 + ], + "headers": { + "X-Powered-By": "Jibres" + }, + "icon": "Jibres.svg", + "website": "https://jibres.com" + }, "Jimdo": { "cats": [ 1 @@ -8818,7 +8880,8 @@ ], "headers": { "Server": "^Pantheon", - "x-pantheon-styx-hostname": "" + "x-pantheon-styx-hostname": "", + "x-styx-req-id": "" }, "icon": "pantheon.svg", "implies": [ @@ -10838,20 +10901,6 @@ "script": "sitemeter\\.com/js/counter\\.js\\?site=", "website": "http://www.sitemeter.com" }, - "SiteCatalyst": { - "cats": [ - 10 - ], - "icon": "SiteCatalyst.png", - "js": { - "s_INST": "", - "s_account": "", - "s_code": "", - "s_objectID": "" - }, - "script": "/s[_-]code.*\\.js", - "website": "http://www.adobe.com/solutions/digital-marketing.html" - }, "SiteEdit": { "cats": [ 1 @@ -10988,14 +11037,47 @@ }, "website": "http://www.seneca.nl/pub/Smartsite/Smartsite-Smartsite-iXperion" }, - "Smartstore": { + "Smartstore biz": { "cats": [ 6 ], - "icon": "Smartstore.png", + "icon": "Smartstore.biz.png", "script": "smjslib\\.js", "website": "http://smartstore.com" - }, + } + , + "Smartstore.NET": { + "cats": [ + 1, + 6 + ], + "cookies": { + "SMARTSTORE.VISITOR": "" + }, + "html": "", + "icon": "smartstore.png", + "implies": "Microsoft ASP.NET", + "meta": { + "generator": "^SmartStore.NET (.+)$\\;version:\\1" + }, + "website": "https://www.smartstore.com" + }, + "Smartstore": { + "cats": [ + 1, + 6 + ], + "cookies": { + "SMARTSTORE.VISITOR": "" + }, + "html": "", + "icon": "Smartstore.png", + "implies": "Microsoft ASP.NET", + "meta": { + "generator": "^Smartstore (.+)$\\;version:\\1" + }, + "website": "https://www.smartstore.com" + }, "Snap": { "cats": [ 18, diff --git a/src/drivers/npm/README.md b/src/drivers/npm/README.md index a2d83e0b3..9bba60c30 100644 --- a/src/drivers/npm/README.md +++ b/src/drivers/npm/README.md @@ -31,6 +31,7 @@ wappalyzer [options] -m, --max-urls=... Exit when num URLs have been analysed -w, --max-wait=... Wait no more than ms milliseconds for page resources to load -P, --pretty Pretty-print JSON output +-p, --probe Perform a deeper scan by requesting common files -r, --recursive Follow links on pages (crawler) -a, --user-agent=... Set the user agent string ``` @@ -59,6 +60,7 @@ const options = { maxUrls: 10, maxWait: 5000, recursive: true, + probe: true, userAgent: 'Wappalyzer', htmlMaxCols: 2000, htmlMaxRows: 2000, @@ -117,3 +119,16 @@ const urls = ['https://www.wappalyzer.com', 'https://www.example.com'] await wappalyzer.destroy() })() ``` + +### Events + +Listen to events with `site.on(eventName, callback)`. Use the `page` parameter to access the Puppeteer page instance ([reference](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#class-page)). + +| Event | Parameters | Description | +|-------------|--------------------------------|------------------------------------------| +| `log` | `message`, `source` | Debug messages | +| `error` | `message`, `source` | Error messages | +| `request` | `page`, `request` | Emitted at the start of a request | +| `response` | `page`, `request` | Emitted upon receiving a server response | +| `goto` | `page`, `url`, `html`, `cookies`, `scripts`, `meta`, `js`, `language` `links` | Emitted after a page has been analysed | +| `analyze` | `urls`, `technologies`, `meta` | Emitted when the site has been analysed | diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index 3bfd47b93..0f979966a 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -1,7 +1,8 @@ const { URL } = require('url') const fs = require('fs') const path = require('path') -const LanguageDetect = require('languagedetect') +const http = require('http') +const https = require('https') const Wappalyzer = require('./wappalyzer') const { @@ -19,7 +20,8 @@ let chromiumArgs = [ '--no-sandbox', '--headless', '--disable-gpu', - '--ignore-certificate-errors' + '--ignore-certificate-errors', + '--disable-web-security' ] let chromiumBin = CHROMIUM_BIN @@ -34,10 +36,6 @@ if (AWS_LAMBDA_FUNCTION_NAME) { puppeteer = require('puppeteer') } -const languageDetect = new LanguageDetect() - -languageDetect.setLanguageType('iso2') - const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/ const { apps: technologies, categories } = JSON.parse( @@ -64,6 +62,32 @@ function analyzeJs(js) { ) } +function get(url) { + if (['http:', 'https:'].includes(url.protocol)) { + const { get } = url.protocol === 'http:' ? http : https + + return new Promise((resolve, reject) => + get(url.href, (response) => { + if (response.statusCode >= 400) { + return reject( + new Error(`${response.statusCode} ${response.statusMessage}`) + ) + } + + response.setEncoding('utf8') + + let body = '' + + response.on('data', (data) => (body += data)) + response.on('error', (error) => reject(new Error(error.message))) + response.on('end', () => resolve(body)) + }) + ) + } else { + throw new Error(`Invalid protocol: ${url.protocol}`) + } +} + class Driver { constructor(options = {}) { this.options = { @@ -74,16 +98,16 @@ class Driver { htmlMaxRows: 3000, maxDepth: 3, maxUrls: 10, - maxWait: 5000, + maxWait: 30000, recursive: false, + probe: false, ...options } this.options.debug = Boolean(+this.options.debug) this.options.recursive = Boolean(+this.options.recursive) - this.options.delay = this.options.recursive - ? parseInt(this.options.delay, 10) - : 0 + this.options.probe = Boolean(+this.options.probe) + this.options.delay = parseInt(this.options.delay, 10) this.options.maxDepth = parseInt(this.options.maxDepth, 10) this.options.maxUrls = parseInt(this.options.maxUrls, 10) this.options.maxWait = parseInt(this.options.maxWait, 10) @@ -161,7 +185,6 @@ class Site { this.analyzedUrls = {} this.detections = [] - this.language = '' this.listeners = {} @@ -191,7 +214,9 @@ class Site { emit(event, params) { if (this.listeners[event]) { - this.listeners[event].forEach((listener) => listener(params)) + return Promise.all( + this.listeners[event].map((listener) => listener(params)) + ) } } @@ -230,15 +255,13 @@ class Site { await page.setRequestInterception(true) - page.on('console', (msg) => console.log('PAGE LOG:', msg._text)) - page.on('dialog', (dialog) => dialog.dismiss()) page.on('error', (error) => this.error(error)) let responseReceived = false - page.on('request', (request) => { + page.on('request', async (request) => { try { if ( (responseReceived && request.isNavigationRequest()) || @@ -252,6 +275,8 @@ class Site { ...this.options.headers } + await this.emit('request', { page, request }) + request.continue({ headers }) } } catch (error) { @@ -259,7 +284,7 @@ class Site { } }) - page.on('response', (response) => { + page.on('response', async (response) => { try { if (response.url() === url.href) { this.analyzedUrls[url.href] = { @@ -288,6 +313,8 @@ class Site { responseReceived = true this.onDetect(analyze({ headers })) + + await this.emit('response', { page, response }) } } } catch (error) { @@ -440,34 +467,6 @@ class Site { throw new Error('No response from server') } - if (!this.language) { - this.language = await Promise.race([ - this.timeout(), - ( - await page.evaluateHandle( - () => - document.documentElement.getAttribute('lang') || - document.documentElement.getAttribute('xml:lang') - ) - ).jsonValue() - ]) - } - - if (!this.language) { - try { - const [attrs] = languageDetect.detect( - html.replace(/<\/?[^>]+(>|$)/gs, ' '), - 1 - ) - - if (attrs) { - ;[this.language] = attrs - } - } catch (error) { - this.error(error) - } - } - this.onDetect(analyzeJs(js)) this.onDetect( @@ -503,12 +502,21 @@ class Site { [] ) + await this.emit('goto', { + page, + url, + html, + cookies, + scripts, + meta, + js, + links: reducedLinks + }) + await page.close() this.log('Page closed') - this.emit('goto', url) - return reducedLinks } catch (error) { this.error(error) @@ -517,7 +525,13 @@ class Site { async analyze(url = this.originalUrl, index = 1, depth = 1) { try { - await sleep(this.options.delay * index) + if (this.recursive) { + await sleep(this.options.delay * index) + } + + if (this.options.probe) { + await this.probe(url) + } const links = await this.goto(url) @@ -533,7 +547,7 @@ class Site { this.error(error) } - return { + const results = { urls: this.analyzedUrls, technologies: resolve(this.detections).map( ({ @@ -559,9 +573,32 @@ class Site { name })) }) - ), - meta: { - language: this.language + ) + } + + await this.emit('analyze', results) + + return results + } + + async probe(url) { + const files = { + robots: '/robots.txt' + } + + for (const file of Object.keys(files)) { + const path = files[file] + + try { + await sleep(this.options.delay) + + const body = await get(new URL(path, url.href)) + + this.log(`get ${path}: ok`) + + this.onDetect(analyze({ [file]: body })) + } catch (error) { + this.error(`get ${path}: ${error.message || error}`) } } } @@ -580,7 +617,7 @@ class Site { await this.batch(links, depth, batch + 1) } - onDetect(detections = [], language) { + onDetect(detections = []) { this.detections = this.detections.concat(detections) this.detections.filter( diff --git a/src/drivers/npm/package.json b/src/drivers/npm/package.json index 40256719d..0199da6df 100644 --- a/src/drivers/npm/package.json +++ b/src/drivers/npm/package.json @@ -13,7 +13,7 @@ "software" ], "homepage": "https://www.wappalyzer.com", - "version": "6.2.4", + "version": "6.2.6", "author": "Wappalyzer", "license": "MIT", "repository": { @@ -35,7 +35,6 @@ "wappalyzer": "./cli.js" }, "dependencies": { - "languagedetect": "^2.0.0", "puppeteer": "^2.0.0" } -} \ No newline at end of file +} diff --git a/src/drivers/webextension/_locales/ko/messages.json b/src/drivers/webextension/_locales/ko/messages.json new file mode 100644 index 000000000..9e6f2f47e --- /dev/null +++ b/src/drivers/webextension/_locales/ko/messages.json @@ -0,0 +1,89 @@ +{ + "github": { "message": "GitHub에서 Wappalyzer를 포크하세요!" }, + "twitter": { "message": "Twitter에서 Wappalyzer 팔로우" }, + "website": { "message": "wappalyzer.com으로 가기" }, + "options": { "message": "설정" }, + "optionsSave": { "message": "설정 저장" }, + "optionsSaved": { "message": "저장됨" }, + "optionUpgradeMessage": { "message": "업그레이드에 대해 알리기" }, + "optionDynamicIcon": { "message": "Wappalyzer 로고 대신 식별된 기술 아이콘을 사용" }, + "optionTracking": { "message": "익명으로 wappalyzer.com에 식별된 기술 정보 전송" }, + "optionThemeMode": { "message": "다크 모드 호환 활성화" }, + "optionBadge": { "message": "아이콘에 식별된 기술 갯수 표시" }, + "disableOnDomain": { "message": "이 웹 사이트에서 끄기" }, + "clearCache": { "message": "캐시된 식별 정보 지우기" }, + "nothingToDo": { "message": "여기에는 할 일이 없네요." }, + "noAppsDetected": { "message": "식별된 기술이 없습니다." }, + "categoryPin": { "message": "항상 아이콘 보이기" }, + "termsAccept": { "message": "수락" }, + "termsContent": { "message": "이 확장 기능은 사이트의 도메인과 식별된 기술을 포함한 익명 정보를 wappalyzer.com에 전송합니다. 이 기능은 설정에서 비활성화 할 수 있습니다." }, + "privacyPolicy": { "message": "개인정보처리방침" }, + "createAlert": { "message": "이 웹 사이트에 대한 알림 받기" }, + "categoryName1": { "message": "CMS" }, + "categoryName2": { "message": "포럼 소프트웨어" }, + "categoryName3": { "message": "데이터베이스 관리 도구" }, + "categoryName4": { "message": "문서 도구" }, + "categoryName5": { "message": "위젯" }, + "categoryName6": { "message": "전자상거래" }, + "categoryName7": { "message": "사진 갤러리" }, + "categoryName8": { "message": "위키" }, + "categoryName9": { "message": "호스팅 패널" }, + "categoryName10": { "message": "분석" }, + "categoryName11": { "message": "블로그" }, + "categoryName12": { "message": "JavaScript 프레임워크" }, + "categoryName13": { "message": "이슈 트래커" }, + "categoryName14": { "message": "비디오 플레이어" }, + "categoryName15": { "message": "댓글 시스템" }, + "categoryName16": { "message": "보안" }, + "categoryName17": { "message": "폰트 스크립트" }, + "categoryName18": { "message": "웹 프레임워크" }, + "categoryName19": { "message": "기타" }, + "categoryName20": { "message": "에디터" }, + "categoryName21": { "message": "LMS" }, + "categoryName22": { "message": "웹 서버" }, + "categoryName23": { "message": "캐싱" }, + "categoryName24": { "message": "텍스트 에디터" }, + "categoryName25": { "message": "JavaScript 그래픽" }, + "categoryName26": { "message": "모바일 프레임워크" }, + "categoryName27": { "message": "프로그래밍 언어" }, + "categoryName28": { "message": "운영체제" }, + "categoryName29": { "message": "검색 엔진" }, + "categoryName30": { "message": "웹 메일" }, + "categoryName31": { "message": "CDN" }, + "categoryName32": { "message": "마케팅 자동화" }, + "categoryName33": { "message": "웹 서버 확장" }, + "categoryName34": { "message": "데이터베이스" }, + "categoryName35": { "message": "지도" }, + "categoryName36": { "message": "광고" }, + "categoryName37": { "message": "네트워크 서비스" }, + "categoryName38": { "message": "미디어 서버" }, + "categoryName39": { "message": "웹캠" }, + "categoryName40": { "message": "프린터" }, + "categoryName41": { "message": "결제 처리" }, + "categoryName42": { "message": "Tag managers" }, + "categoryName44": { "message": "CI" }, + "categoryName46": { "message": "원격 접속" }, + "categoryName47": { "message": "개발" }, + "categoryName48": { "message": "네트워크 스토리지" }, + "categoryName49": { "message": "피드 리더" }, + "categoryName50": { "message": "DMS" }, + "categoryName51": { "message": "사이트 제작 도구" }, + "categoryName52": { "message": "실시간 채팅" }, + "categoryName53": { "message": "CRM" }, + "categoryName54": { "message": "SEO" }, + "categoryName55": { "message": "회계" }, + "categoryName56": { "message": "가상화폐 채굴기" }, + "categoryName57": { "message": "정적 사이트 생성기" }, + "categoryName58": { "message": "사용자 유도" }, + "categoryName59": { "message": "JavaScript 라이브러리" }, + "categoryName60": { "message": "컨테이너" }, + "categoryName61": { "message": "SaaS" }, + "categoryName62": { "message": "PaaS" }, + "categoryName63": { "message": "IaaS" }, + "categoryName64": { "message": "리버스 프록시" }, + "categoryName65": { "message": "로드 밸런서" }, + "categoryName66": { "message": "UI 프레임워크" }, + "categoryName67": { "message": "쿠키 동의" }, + "categoryName68": { "message": "접근성"}, + "categoryName69": { "message": "소셜 로그인"} +} diff --git a/src/drivers/webextension/images/icons/Adobe Analytics.svg b/src/drivers/webextension/images/icons/Adobe Analytics.svg new file mode 100644 index 000000000..cb2827f2f --- /dev/null +++ b/src/drivers/webextension/images/icons/Adobe Analytics.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/src/drivers/webextension/images/icons/Adobe Experience Manager.svg b/src/drivers/webextension/images/icons/Adobe Experience Manager.svg deleted file mode 100644 index af1c873a1..000000000 --- a/src/drivers/webextension/images/icons/Adobe Experience Manager.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/drivers/webextension/images/icons/Adobe.svg b/src/drivers/webextension/images/icons/Adobe.svg new file mode 100644 index 000000000..0843b200c --- /dev/null +++ b/src/drivers/webextension/images/icons/Adobe.svg @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/src/drivers/webextension/images/icons/Jibres.svg b/src/drivers/webextension/images/icons/Jibres.svg new file mode 100644 index 000000000..7dfe4ca8b --- /dev/null +++ b/src/drivers/webextension/images/icons/Jibres.svg @@ -0,0 +1 @@ +Jibres-Logo-icon-zero \ No newline at end of file diff --git a/src/drivers/webextension/images/icons/SiteCatalyst.png b/src/drivers/webextension/images/icons/SiteCatalyst.png deleted file mode 100644 index 002de3d70..000000000 Binary files a/src/drivers/webextension/images/icons/SiteCatalyst.png and /dev/null differ diff --git a/src/drivers/webextension/images/icons/Smartstore.biz.png b/src/drivers/webextension/images/icons/Smartstore.biz.png new file mode 100644 index 000000000..1c35a5195 Binary files /dev/null and b/src/drivers/webextension/images/icons/Smartstore.biz.png differ diff --git a/src/drivers/webextension/images/icons/Smartstore.png b/src/drivers/webextension/images/icons/Smartstore.png index 1c35a5195..91a231485 100644 Binary files a/src/drivers/webextension/images/icons/Smartstore.png and b/src/drivers/webextension/images/icons/Smartstore.png differ diff --git a/src/package.json b/src/package.json index 4c3ce89e7..e9aba8300 100644 --- a/src/package.json +++ b/src/package.json @@ -13,7 +13,7 @@ "software" ], "homepage": "https://www.wappalyzer.com", - "version": "6.2.4", + "version": "6.2.6", "author": "Wappalyzer", "license": "MIT", "repository": { @@ -27,4 +27,4 @@ "files": [ "wappalyzer.js" ] -} \ No newline at end of file +} diff --git a/src/wappalyzer.js b/src/wappalyzer.js index e633667f7..07f87ee9e 100644 --- a/src/wappalyzer.js +++ b/src/wappalyzer.js @@ -180,7 +180,7 @@ const Wappalyzer = { * Initialize analyzation. * @param {*} param0 */ - analyze({ url, html, meta, headers, cookies, scripts }) { + analyze({ url, html, robots, meta, headers, cookies, scripts }) { const oo = Wappalyzer.analyzeOneToOne const om = Wappalyzer.analyzeOneToMany const mm = Wappalyzer.analyzeManyToMany @@ -193,6 +193,7 @@ const Wappalyzer = { flatten([ oo(technology, 'url', url), oo(technology, 'html', html), + oo(technology, 'robots', robots), om(technology, 'scripts', scripts), mm(technology, 'cookies', cookies), mm(technology, 'meta', meta), @@ -219,6 +220,7 @@ const Wappalyzer = { cats, url, html, + robots, meta, headers, cookies, @@ -239,6 +241,7 @@ const Wappalyzer = { headers: transform(headers), cookies: transform(cookies), html: transform(html), + robots: transform(robots), meta: transform(meta), scripts: transform(script), js: transform(js, true),