@ -10,6 +10,12 @@ const json = JSON.parse(fs.readFileSync(path.resolve(__dirname + '/apps.json')))
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/ ;
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/ ;
const errorTypes = {
RESPONSE _NOT _OK : 'Response was not ok' ,
NO _RESPONSE : 'No response from server' ,
NO _HTML _DOCUMENT : 'No HTML document' ,
} ;
class Driver {
class Driver {
constructor ( pageUrl , options ) {
constructor ( pageUrl , options ) {
this . options = Object . assign ( { } , {
this . options = Object . assign ( { } , {
@ -38,7 +44,7 @@ class Driver {
this . options . htmlMaxRows = parseInt ( this . options . htmlMaxRows , 10 ) ;
this . options . htmlMaxRows = parseInt ( this . options . htmlMaxRows , 10 ) ;
this . origPageUrl = url . parse ( pageUrl ) ;
this . origPageUrl = url . parse ( pageUrl ) ;
this . analyzedPageUrls = [] ;
this . analyzedPageUrls = {} ;
this . apps = [ ] ;
this . apps = [ ] ;
this . meta = { } ;
this . meta = { } ;
@ -99,22 +105,24 @@ class Driver {
fetch ( pageUrl , index , depth ) {
fetch ( pageUrl , index , depth ) {
// Return when the URL is a duplicate or maxUrls has been reached
// Return when the URL is a duplicate or maxUrls has been reached
if ( this . analyzedPageUrls . indexOf ( pageUrl . href ) !== - 1 || this . analyzedPageUrls . length >= this . options . maxUrls ) {
if ( this . analyzedPageUrls [ pageUrl . href ] || this . analyzedPageUrls . length >= this . options . maxUrls ) {
return Promise . resolve ( ) ;
return Promise . resolve ( ) ;
}
}
this . analyzedPageUrls . push ( pageUrl . href ) ;
this . analyzedPageUrls [ pageUrl . href ] = {
status : 0 ,
} ;
const timerScope = {
const timerScope = {
last : new Date ( ) . getTime ( )
last : new Date ( ) . getTime ( )
} ;
} ;
this . timer ( 'fetch; url: ' + pageUrl . href + '; depth: ' + depth + '; delay: ' + ( this . options . delay * index ) + 'ms' , timerScope ) ;
this . timer ( 'fetch; url: ' + pageUrl . href + '; depth: ' + depth + '; delay: ' + ( this . options . delay * index ) + 'ms' , timerScope ) ;
return new Promise ( resolve => this . sleep ( this . options . delay * index ) . then ( ( ) => this . visit ( pageUrl , timerScope , resolve ) ) ) ;
return new Promise ( ( resolve , reject ) => this . sleep ( this . options . delay * index ) . then ( ( ) => this . visit ( pageUrl , timerScope , resolve , reject ) ) ) ;
}
}
visit ( pageUrl , timerScope , resolve ) {
visit ( pageUrl , timerScope , resolve , reject ) {
const browser = new Browser ( {
const browser = new Browser ( {
proxy : this . options . proxy ,
proxy : this . options . proxy ,
silent : true ,
silent : true ,
@ -133,9 +141,13 @@ class Driver {
browser . visit ( pageUrl . href , ( ) => {
browser . visit ( pageUrl . href , ( ) => {
this . timer ( 'browser.visit end; url: ' + pageUrl . href , timerScope ) ;
this . timer ( 'browser.visit end; url: ' + pageUrl . href , timerScope ) ;
if ( ! this . responseOk ( browser , pageUrl ) ) {
try {
if ( ! this . checkResponse ( browser , pageUrl ) ) {
return resolve ( ) ;
return resolve ( ) ;
}
}
} catch ( error ) {
return reject ( error ) ;
}
const headers = this . getHeaders ( browser ) ;
const headers = this . getHeaders ( browser ) ;
const html = this . getHtml ( browser ) ;
const html = this . getHtml ( browser ) ;
@ -168,20 +180,18 @@ class Driver {
} ) ;
} ) ;
}
}
responseOk ( browser , pageUrl ) {
checkResponse ( browser , pageUrl ) {
// Validate response
// Validate response
const resource = browser . resources . length ? browser . resources . filter ( resource => resource . response ) . shift ( ) : null ;
const resource = browser . resources . length ? browser . resources . filter ( resource => resource . response ) . shift ( ) : null ;
if ( ! resource ) {
if ( ! resource ) {
this . wappalyzer . log ( 'No response from server; url: ' + pageUrl . href , 'driver' , 'error' ) ;
throw new Error ( 'NO_RESPONSE' ) ;
return false ;
}
}
if ( resource . response . status !== 200 ) {
this . analyzedPageUrls [ pageUrl . href ] . status = resource . response . status ;
this . wappalyzer . log ( 'Response was not OK; status: ' + resource . response . status + ' ' + resource . response . statusText + '; url: ' + pageUrl . href , 'driver' , 'error' ) ;
return false ;
if ( resource . response . status !== 200 ) {
throw new Error ( 'RESPONSE_NOT_OK' ) ;
}
}
const headers = this . getHeaders ( browser ) ;
const headers = this . getHeaders ( browser ) ;
@ -192,16 +202,14 @@ class Driver {
if ( ! contentType || ! /\btext\/html\b/ . test ( contentType ) ) {
if ( ! contentType || ! /\btext\/html\b/ . test ( contentType ) ) {
this . wappalyzer . log ( 'Skipping; url: ' + pageUrl . href + '; content type: ' + contentType , 'driver' ) ;
this . wappalyzer . log ( 'Skipping; url: ' + pageUrl . href + '; content type: ' + contentType , 'driver' ) ;
this. analyzedPageUrls . splice ( this . analyzedPageUrls . indexOf ( pageUrl . href ) , 1 ) ;
delete this . analyzedPageUrls [ pageUrl . href ] ;
return false ;
return false ;
}
}
// Validate document
// Validate document
if ( ! browser . document || ! browser . document . documentElement ) {
if ( ! browser . document || ! browser . document . documentElement ) {
this . wappalyzer . log ( 'No HTML document; url: ' + pageUrl . href , 'driver' , 'error' ) ;
throw new Error ( 'NO_HTML_DOCUMENT' ) ;
return false ;
}
}
return true ;
return true ;
@ -234,7 +242,7 @@ class Driver {
. slice ( 0 , this . options . htmlMaxRows / 2 ) . concat ( html . slice ( html . length - this . options . htmlMaxRows / 2 ) )
. slice ( 0 , this . options . htmlMaxRows / 2 ) . concat ( html . slice ( html . length - this . options . htmlMaxRows / 2 ) )
. map ( line => line . substring ( 0 , this . options . htmlMaxCols ) )
. map ( line => line . substring ( 0 , this . options . htmlMaxCols ) )
. join ( '\n' ) ;
. join ( '\n' ) ;
} catch ( error ) {
} catch ( error ) {
this . wappalyzer . log ( error . message , 'browser' , 'error' ) ;
this . wappalyzer . log ( error . message , 'browser' , 'error' ) ;
}
}
@ -303,7 +311,17 @@ class Driver {
return new Promise ( resolve => {
return new Promise ( resolve => {
this . fetch ( pageUrl , index , depth )
this . fetch ( pageUrl , index , depth )
. catch ( ( ) => { } )
. catch ( error => {
const type = error . message && errorTypes [ error . message ] ? error . message : 'UNKNOWN_ERROR' ;
const message = error . message && errorTypes [ error . message ] ? errorTypes [ error . message ] : 'Unknown error' ;
this . analyzedPageUrls [ pageUrl . href ] . error = {
type ,
message ,
} ;
this . wappalyzer . log ( ` ${ message } ; url: ${ pageUrl . href } ` , 'driver' , 'error' ) ;
} )
. then ( links => {
. then ( links => {
if ( links && this . options . recursive && depth < this . options . maxDepth ) {
if ( links && this . options . recursive && depth < this . options . maxDepth ) {
return this . chunk ( links . slice ( 0 , this . options . maxUrls ) , depth + 1 ) ;
return this . chunk ( links . slice ( 0 , this . options . maxUrls ) , depth + 1 ) ;