@ -1,4 +1,4 @@
const os = require ( 'os' )
// const os = require('os')
const fs = require ( 'fs' )
const dns = require ( 'dns' ) . promises
const path = require ( 'path' )
@ -22,10 +22,6 @@ const chromiumArgs = [
` --user-data-dir= ${ CHROMIUM _DATA _DIR || '/tmp/chromium' } ` ,
]
if ( os . arch ( ) === 'arm64' ) {
chromiumArgs . push ( '--single-process' )
}
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/
const categories = JSON . parse (
@ -400,6 +396,8 @@ class Site {
this . cache = { }
this . probed = false
this . destroyed = false
}
log ( message , source = 'driver' , type = 'log' ) {
@ -425,7 +423,7 @@ class Site {
emit ( event , params ) {
if ( this . listeners [ event ] ) {
return Promise . all (
return Promise . all Settled (
this . listeners [ event ] . map ( ( listener ) => listener ( params ) )
)
}
@ -435,7 +433,7 @@ class Site {
promise ,
fallback ,
errorMessage = 'Operation took too long to complete' ,
maxWait = this . options . maxWait
maxWait = Math . min ( this . options . maxWait , 1000 )
) {
let timeout = null
@ -470,12 +468,16 @@ class Site {
}
async goto ( url ) {
if ( this . destroyed ) {
return
}
// Return when the URL is a duplicate or maxUrls has been reached
if ( this . analyzedUrls [ url . href ] ) {
return [ ]
}
this . log ( ` Navigate to ${ url } ` , 'page' )
this . log ( ` Navigate to ${ url } ` )
this . analyzedUrls [ url . href ] = {
status : 0 ,
@ -493,7 +495,13 @@ class Site {
try {
page = await this . browser . newPage ( )
if ( ! page || page . isClosed ( ) ) {
throw new Error ( 'Page did not open' )
}
} catch ( error ) {
error . message += ` ( ${ url } ) `
this . error ( error )
await this . initDriver ( )
@ -509,9 +517,15 @@ class Site {
await page . setRequestInterception ( true )
await page . setUserAgent ( this . options . userAgent )
page . on ( 'dialog' , ( dialog ) => dialog . dismiss ( ) )
page . on ( 'error' , ( error ) => this . error ( error ) )
page . on ( 'error' , ( error ) => {
error . message += ` ( ${ url } ) `
this . error ( error )
} )
let responseReceived = false
@ -523,6 +537,8 @@ class Site {
try {
; ( { hostname } = new URL ( request . url ( ) ) )
} catch ( error ) {
request . abort ( 'blockedbyclient' )
return
}
@ -563,11 +579,17 @@ class Site {
request . continue ( { headers } )
}
} catch ( error ) {
error . message += ` ( ${ url } ) `
this . error ( error )
}
} )
page . on ( 'response' , async ( response ) => {
if ( this . destroyed || ! page || page . _ _closed || page . isClosed ( ) ) {
return
}
try {
if (
response . status ( ) < 300 &&
@ -578,7 +600,15 @@ class Site {
await this . onDetect ( response . url ( ) , analyze ( { scripts } ) )
}
} catch ( error ) {
if ( error . constructor . name !== 'ProtocolError' ) {
error . message += ` ( ${ url } ) `
this . error ( error )
}
}
try {
if ( response . url ( ) === url . href ) {
this . analyzedUrls [ url . href ] = {
status : response . status ( ) ,
@ -625,26 +655,21 @@ class Site {
await this . emit ( 'response' , { page , response , headers , certIssuer } )
}
} catch ( error ) {
error . message += ` ( ${ url } ) `
this . error ( error )
}
} )
await page . setUserAgent ( this . options . userAgent )
try {
try {
await this . promiseTimeout ( page . goto ( url . href ) )
} catch ( error ) {
if (
error . constructor . name !== 'TimeoutError' &&
error . code !== 'PROMISE_TIMEOUT_ERROR'
) {
throw error
}
}
await page . goto ( url . href )
if ( page . url ( ) === 'about:blank' ) {
throw new Error ( 'The website failed to load' )
const error = new Error ( ` The page failed to load ( ${ url } ) ` )
error . code = 'WAPPALYZER_PAGE_EMPTY'
throw error
}
if ( ! this . options . noScripts ) {
@ -665,6 +690,8 @@ class Site {
{ }
)
} catch ( error ) {
error . message += ` ( ${ url } ) `
this . error ( error )
}
@ -906,65 +933,57 @@ class Site {
... this . cache [ url . href ] ,
} )
await page . close ( )
this . log ( ` Page closed ( ${ url } ) ` )
page . _ _closed = true
return reducedLinks
} catch ( error ) {
try {
await page . close ( )
this . log ( ` Page closed ( ${ url } ) ` )
} catch ( error ) {
this . log ( error )
// Continue
}
let hostname = url
return reducedLinks
} catch ( error ) {
page . _ _closed = true
try {
; ( { hostname } = new URL ( url ) )
await page . close ( )
this . log ( ` Page closed ( ${ url } ) ` )
} catch ( error ) {
// Continue
}
if (
error . constructor . name === 'TimeoutError' ||
error . code === 'PROMISE_TIMEOUT_ERROR'
) {
const newError = new Error (
` The website took too long to respond: ${
error . message || error
} at $ { hostname } `
)
newError . code = 'WAPPALYZER_TIMEOUT_ERROR'
throw newError
}
if ( error . message . includes ( 'net::ERR_NAME_NOT_RESOLVED' ) ) {
const newError = new Error (
` Hostname could not be resolved at ${ hostname } `
)
const newError = new Error ( ` Hostname could not be resolved ( ${ url } ) ` )
newError . code = 'WAPPALYZER_DNS_ERROR'
throw newError
}
if (
error . constructor . name === 'TimeoutError' ||
error . code === 'PROMISE_TIMEOUT_ERROR'
) {
error . code = 'WAPPALYZER_TIMEOUT_ERROR'
}
error . message += ` ( ${ url } ) `
throw error
}
}
async analyze ( url = this . originalUrl , index = 1 , depth = 1 ) {
try {
if ( this . options . recursive ) {
await sleep ( this . options . delay * index )
}
if ( this . options . recursive ) {
await sleep ( this . options . delay * index )
}
await Promise . all ( [
( async ( ) => {
await Promise . allSettled ( [
( async ( ) => {
try {
const links = ( ( await this . goto ( url ) ) || [ ] ) . filter (
( { href } ) => ! this . analyzedUrls [ href ]
)
@ -983,23 +1002,25 @@ class Site {
depth + 1
)
}
} ) ( ) ,
( async ( ) => {
if ( this . options . probe && ! this . probed ) {
this . probed = true
await this . probe ( url )
} catch ( error ) {
this . analyzedUrls [ url . href ] = {
status : this . analyzedUrls [ url . href ] ? . status || 0 ,
error : error . message || error . toString ( ) ,
}
} ) ( ) ,
] )
} catch ( error ) {
this . analyzedUrls [ url . href ] = {
status : this . analyzedUrls [ url . href ] ? . status || 0 ,
error : error . message || error . toString ( ) ,
}
this . error ( error )
}
error . message += ` ( ${ url } ) `
this . error ( error )
}
} ) ( ) ,
( async ( ) => {
if ( this . options . probe && ! this . probed ) {
this . probed = true
await this . probe ( url )
}
} ) ( ) ,
] )
const patterns = this . options . extended
? this . detections . reduce (
@ -1076,6 +1097,8 @@ class Site {
return this . promiseTimeout (
func ( hostname ) . catch ( ( error ) => {
if ( error . code !== 'ENODATA' ) {
error . message += ` ( ${ url } ) `
this . error ( error )
}
@ -1089,7 +1112,7 @@ class Site {
const domain = url . hostname . replace ( /^www\./ , '' )
await Promise . all ( [
await Promise . all Settled ( [
// Static files
... Object . keys ( files ) . map ( async ( file , index ) => {
const path = files [ file ]
@ -1099,7 +1122,7 @@ class Site {
const body = await get ( new URL ( path , url . href ) , {
userAgent : this . options . userAgent ,
timeout : Math . min ( this . options . maxWait , 3 000) ,
timeout : Math . min ( this . options . maxWait , 1 000) ,
} )
this . log ( ` Probe ok ( ${ path } ) ` )
@ -1156,7 +1179,7 @@ class Site {
const batched = links . splice ( 0 , this . options . batchSize )
await Promise . all (
await Promise . all Settled (
batched . map ( ( link , index ) => this . analyze ( link , index , depth ) )
)
@ -1189,7 +1212,7 @@ class Site {
) ,
]
await Promise . all (
await Promise . all Settled (
requires . map ( async ( { name , categoryId , technologies } ) => {
const id = categoryId
? ` category: ${ categoryId } `
@ -1242,9 +1265,11 @@ class Site {
}
async destroy ( ) {
await Promise . all (
await Promise . all Settled (
this . pages . map ( async ( page ) => {
if ( page ) {
page . _ _closed = true
try {
await page . close ( )
} catch ( error ) {
@ -1254,6 +1279,8 @@ class Site {
} )
)
this . destroyed = true
this . log ( 'Site closed' )
}
}