@ -283,7 +283,11 @@ function analyzeDom(dom, technologies = Wappalyzer.technologies) {
}
}
function get ( url , options = { } ) {
function get ( url , options = { } ) {
const timeout = options . timeout || 10000
const timeout =
options . timeout ||
( this . options . fast
? this . Math . min ( this . options . maxWait , 3000 )
: this . options . maxWait )
if ( [ 'http:' , 'https:' ] . includes ( url . protocol ) ) {
if ( [ 'http:' , 'https:' ] . includes ( url . protocol ) ) {
const { get } = url . protocol === 'http:' ? http : https
const { get } = url . protocol === 'http:' ? http : https
@ -314,7 +318,7 @@ function get(url, options = {}) {
}
}
)
)
. setTimeout ( timeout , ( ) =>
. setTimeout ( timeout , ( ) =>
reject ( new Error ( ` Timeout ( ${ url . href } , ${ timeout } ms) ` ) )
reject ( new Error ( ` Timeout ( ${ url } , ${ timeout } ms) ` ) )
)
)
. on ( 'error' , ( error ) => reject ( new Error ( error . message ) ) )
. on ( 'error' , ( error ) => reject ( new Error ( error . message ) ) )
)
)
@ -345,6 +349,7 @@ class Driver {
}
}
this . options . debug = Boolean ( + this . options . debug )
this . options . debug = Boolean ( + this . options . debug )
this . options . fast = Boolean ( + this . options . fast )
this . options . recursive = Boolean ( + this . options . recursive )
this . options . recursive = Boolean ( + this . options . recursive )
this . options . probe =
this . options . probe =
String ( this . options . probe || '' ) . toLowerCase ( ) === 'basic'
String ( this . options . probe || '' ) . toLowerCase ( ) === 'basic'
@ -369,7 +374,7 @@ class Driver {
}
}
async init ( ) {
async init ( ) {
for ( let attempt = 1 ; attempt <= 3 ; attempt ++ ) {
for ( let attempt = 1 ; attempt <= 2 ; attempt ++ ) {
this . log ( ` Launching browser (attempt ${ attempt } )... ` )
this . log ( ` Launching browser (attempt ${ attempt } )... ` )
try {
try {
@ -385,7 +390,9 @@ class Driver {
acceptInsecureCerts : true ,
acceptInsecureCerts : true ,
args : chromiumArgs ,
args : chromiumArgs ,
executablePath : CHROMIUM _BIN ,
executablePath : CHROMIUM _BIN ,
timeout : 5000 ,
timeout : this . options . fast
? Math . min ( this . options . maxWait , 10000 )
: this . options . maxWait ,
} )
} )
}
}
@ -393,28 +400,20 @@ class Driver {
} catch ( error ) {
} catch ( error ) {
this . log ( error )
this . log ( error )
if ( attempt >= 3 ) {
if ( attempt >= 2 ) {
throw new Error ( error . message || error . toString ( ) )
throw new Error ( error . message || error . toString ( ) )
}
}
}
}
}
}
this . browser . on ( 'disconnected' , async ( ) => {
this . browser . on ( 'disconnected' , ( ) => {
this . log( 'Browser disconnected' )
this . browser = undefined
if ( ! this . destroyed ) {
this . log ( 'Browser disconnected' )
try {
await this . init ( )
} catch ( error ) {
this . log ( error )
}
}
} )
} )
}
}
async destroy ( ) {
async destroy ( ) {
this . destroyed = true
if ( this . browser ) {
if ( this . browser ) {
try {
try {
await sleep ( 1 )
await sleep ( 1 )
@ -507,8 +506,6 @@ class Site {
this . cache = { }
this . cache = { }
this . probed = false
this . probed = false
this . destroyed = false
}
}
log ( message , source = 'driver' , type = 'log' ) {
log ( message , source = 'driver' , type = 'log' ) {
@ -544,7 +541,9 @@ class Site {
promise ,
promise ,
fallback ,
fallback ,
errorMessage = 'Operation took too long to complete' ,
errorMessage = 'Operation took too long to complete' ,
maxWait = Math . min ( this . options . maxWait , 3000 )
maxWait = this . options . fast
? Math . min ( this . options . maxWait , 2000 )
: this . options . maxWait
) {
) {
let timeout = null
let timeout = null
@ -579,10 +578,6 @@ class Site {
}
}
async goto ( url ) {
async goto ( url ) {
if ( this . destroyed ) {
return
}
// Return when the URL is a duplicate or maxUrls has been reached
// Return when the URL is a duplicate or maxUrls has been reached
if ( this . analyzedUrls [ url . href ] ) {
if ( this . analyzedUrls [ url . href ] ) {
return [ ]
return [ ]
@ -640,14 +635,18 @@ class Site {
) {
) {
request . abort ( 'blockedbyclient' )
request . abort ( 'blockedbyclient' )
} else {
} else {
await this . emit ( 'request' , { page , request } )
if ( Object . keys ( this . options . headers ) . length ) {
const headers = {
const headers = {
... request . headers ( ) ,
... request . headers ( ) ,
... this . options . headers ,
... this . options . headers ,
}
}
await this . emit ( 'request' , { page , request } )
request . continue ( { headers } )
request . continue ( { headers } )
} else {
request . continue ( )
}
}
}
} catch ( error ) {
} catch ( error ) {
error . message += ` ( ${ url } ) `
error . message += ` ( ${ url } ) `
@ -657,7 +656,7 @@ class Site {
} )
} )
page . on ( 'response' , async ( response ) => {
page . on ( 'response' , async ( response ) => {
if ( this . destroyed || ! page || page . _ _closed || page . isClosed ( ) ) {
if ( ! page || page . _ _closed || page . isClosed ( ) ) {
return
return
}
}
@ -745,7 +744,7 @@ class Site {
}
}
if ( ! this . options . noScripts ) {
if ( ! this . options . noScripts ) {
await sleep ( 1 000)
await sleep ( this . options . fast ? 1 000 : 3 000)
}
}
// page.on('console', (message) => this.log(message.text()))
// page.on('console', (message) => this.log(message.text()))
@ -810,6 +809,8 @@ class Site {
let dom = [ ]
let dom = [ ]
if ( html ) {
if ( html ) {
await Promise . all ( [
( async ( ) => {
// Links
// Links
links = ! this . options . recursive
links = ! this . options . recursive
? [ ]
? [ ]
@ -818,7 +819,14 @@ class Site {
await this . promiseTimeout (
await this . promiseTimeout (
page . evaluateHandle ( ( ) =>
page . evaluateHandle ( ( ) =>
Array . from ( document . getElementsByTagName ( 'a' ) ) . map (
Array . from ( document . getElementsByTagName ( 'a' ) ) . map (
( { hash , hostname , href , pathname , protocol , rel } ) => ( {
( {
hash ,
hostname ,
href ,
pathname ,
protocol ,
rel ,
} ) => ( {
hash ,
hash ,
hostname ,
hostname ,
href ,
href ,
@ -835,7 +843,8 @@ class Site {
[ ] ,
[ ] ,
'Timeout (links)'
'Timeout (links)'
)
)
} ) ( ) ,
( async ( ) => {
// Text
// Text
text = await this . promiseTimeout (
text = await this . promiseTimeout (
(
(
@ -852,7 +861,8 @@ class Site {
'' ,
'' ,
'Timeout (text)'
'Timeout (text)'
)
)
} ) ( ) ,
( async ( ) => {
// CSS
// CSS
css = await this . promiseTimeout (
css = await this . promiseTimeout (
(
(
@ -887,7 +897,8 @@ class Site {
'' ,
'' ,
'Timeout (css)'
'Timeout (css)'
)
)
} ) ( ) ,
( async ( ) => {
// Script tags
// Script tags
; [ scriptSrc , scripts ] = await this . promiseTimeout (
; [ scriptSrc , scripts ] = await this . promiseTimeout (
(
(
@ -916,7 +927,8 @@ class Site {
[ ] ,
[ ] ,
'Timeout (scripts)'
'Timeout (scripts)'
)
)
} ) ( ) ,
( async ( ) => {
// Meta tags
// Meta tags
meta = await this . promiseTimeout (
meta = await this . promiseTimeout (
(
(
@ -925,10 +937,12 @@ class Site {
Array . from ( document . querySelectorAll ( 'meta' ) ) . reduce (
Array . from ( document . querySelectorAll ( 'meta' ) ) . reduce (
( metas , meta ) => {
( metas , meta ) => {
const key =
const key =
meta . getAttribute ( 'name' ) || meta . getAttribute ( 'property' )
meta . getAttribute ( 'name' ) ||
meta . getAttribute ( 'property' )
if ( key ) {
if ( key ) {
metas [ key . toLowerCase ( ) ] = metas [ key . toLowerCase ( ) ] || [ ]
metas [ key . toLowerCase ( ) ] =
metas [ key . toLowerCase ( ) ] || [ ]
metas [ key . toLowerCase ( ) ] . push (
metas [ key . toLowerCase ( ) ] . push (
meta . getAttribute ( 'content' )
meta . getAttribute ( 'content' )
@ -947,14 +961,18 @@ class Site {
[ ] ,
[ ] ,
'Timeout (meta)'
'Timeout (meta)'
)
)
} ) ( ) ,
( async ( ) => {
// JavaScript
// JavaScript
js = this . options . noScripts
js = this . options . noScripts
? [ ]
? [ ]
: await this . promiseTimeout ( getJs ( page ) , [ ] , 'Timeout (js)' )
: await this . promiseTimeout ( getJs ( page ) , [ ] , 'Timeout (js)' )
} ) ( ) ,
( async ( ) => {
// DOM
// DOM
dom = await this . promiseTimeout ( getDom ( page ) , [ ] , 'Timeout (dom)' )
dom = await this . promiseTimeout ( getDom ( page ) , [ ] , 'Timeout (dom)' )
} ) ( ) ,
] )
}
}
this . cache [ url . href ] = {
this . cache [ url . href ] = {
@ -1037,7 +1055,9 @@ class Site {
}
}
if ( error . message . includes ( 'net::ERR_NAME_NOT_RESOLVED' ) ) {
if ( error . message . includes ( 'net::ERR_NAME_NOT_RESOLVED' ) ) {
const newError = new Error ( ` Hostname could not be resolved ( ${ url } ) ` )
const newError = new Error (
` Hostname could not be resolved ( ${ url . hostname } ) `
)
newError . code = 'WAPPALYZER_DNS_ERROR'
newError . code = 'WAPPALYZER_DNS_ERROR'
@ -1253,7 +1273,9 @@ class Site {
} ) ,
} ) ,
[ ] ,
[ ] ,
'Timeout (dns)' ,
'Timeout (dns)' ,
Math . min ( this . options . maxWait , 15000 )
this . options . fast
? Math . min ( this . options . maxWait , 15000 )
: this . options . maxWait
)
)
}
}
@ -1452,8 +1474,6 @@ class Site {
} )
} )
)
)
this . destroyed = true
this . log ( 'Site closed' )
this . log ( 'Site closed' )
}
}
}
}