You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

246 lines
6.5 KiB

const { AWS_LAMBDA_FUNCTION_NAME, CHROME_BIN } = process.env
let chromium
let puppeteer
if (AWS_LAMBDA_FUNCTION_NAME) {
// eslint-disable-next-line global-require, import/no-unresolved
chromium = require('chrome-aws-lambda')
;({ puppeteer } = chromium)
} else {
// eslint-disable-next-line global-require
puppeteer = require('puppeteer')
}
const Browser = require('../browser')
function getJs() {
const dereference = (obj, level = 0) => {
try {
// eslint-disable-next-line no-undef
if (level > 5 || (level && obj === window)) {
return '[Removed]'
}
if (Array.isArray(obj)) {
obj = obj.map((item) => dereference(item, level + 1))
}
if (
typeof obj === 'function' ||
(typeof obj === 'object' && obj !== null)
) {
const newObj = {}
Object.keys(obj).forEach((key) => {
newObj[key] = dereference(obj[key], level + 1)
})
return newObj
}
return obj
} catch (error) {
return undefined
}
}
// eslint-disable-next-line no-undef
return dereference(window)
}
class PuppeteerBrowser extends Browser {
constructor(options) {
options.maxWait = options.maxWait || 60
super(options)
}
async visit(url) {
let done = false
let browser
try {
await new Promise(async (resolve, reject) => {
try {
browser = await puppeteer.launch(
chromium
? {
args: [...chromium.args, '--ignore-certificate-errors'],
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath,
headless: chromium.headless
}
: {
args: [
'--no-sandbox',
'--headless',
'--disable-gpu',
'--ignore-certificate-errors'
],
executablePath: CHROME_BIN
}
)
browser.on('disconnected', () => {
if (!done) {
reject(new Error('browser: disconnected'))
}
})
const page = await browser.newPage()
page.setDefaultTimeout(this.options.maxWait * 1.1)
await page.setRequestInterception(true)
page.on('error', (error) =>
reject(new Error(`page error: ${error.message || error}`))
)
let responseReceived = false
page.on('request', (request) => {
try {
if (
responseReceived &&
request.isNavigationRequest() &&
request.frame() === page.mainFrame() &&
request.url() !== url
) {
this.log(`abort navigation to ${request.url()}`)
request.abort('aborted')
} else if (!done) {
if (!['document', 'script'].includes(request.resourceType())) {
request.abort()
} else {
request.continue()
}
}
} catch (error) {
reject(new Error(`page error: ${error.message || error}`))
}
})
page.on('response', (response) => {
try {
if (!this.statusCode) {
this.statusCode = response.status()
this.headers = {}
const headers = response.headers()
Object.keys(headers).forEach((key) => {
this.headers[key] = Array.isArray(headers[key])
? headers[key]
: [headers[key]]
})
this.contentType = headers['content-type'] || null
}
if (response.status() < 300 || response.status() > 399) {
responseReceived = true
}
} catch (error) {
reject(new Error(`page error: ${error.message || error}`))
}
})
page.on('console', ({ _type, _text, _location }) => {
if (!/Failed to load resource: net::ERR_FAILED/.test(_text)) {
this.log(
`${_text} (${_location.url}: ${_location.lineNumber})`,
_type
)
}
})
if (this.options.userAgent) {
await page.setUserAgent(this.options.userAgent)
}
try {
await Promise.race([
page.goto(url, { waitUntil: 'domcontentloaded' }),
// eslint-disable-next-line no-shadow
new Promise((resolve, reject) =>
setTimeout(
() => reject(new Error('timeout')),
this.options.maxWait
)
)
])
} catch (error) {
throw new Error(error.message || error.toString())
}
// eslint-disable-next-line no-undef
const links = await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('a')).map(
({ hash, hostname, href, pathname, protocol, rel }) => ({
hash,
hostname,
href,
pathname,
protocol,
rel
})
)
)
this.links = await links.jsonValue()
// eslint-disable-next-line no-undef
const scripts = await page.evaluateHandle(() =>
Array.from(document.getElementsByTagName('script')).map(
({ src }) => src
)
)
this.scripts = (await scripts.jsonValue()).filter((script) => script)
this.js = await page.evaluate(getJs)
this.cookies = (await page.cookies()).map(
({ name, value, domain, path }) => ({
name,
value,
domain,
path
})
)
this.html = await page.content()
resolve()
} catch (error) {
reject(new Error(`visit error: ${error.message || error}`))
}
})
} catch (error) {
this.log(`visit error: ${error.message || error} (${url})`, 'error')
throw new Error(error.message || error.toString())
} finally {
done = true
if (browser) {
try {
await browser.close()
this.log('browser close ok')
} catch (error) {
this.log(`browser close error: ${error.message || error}`, 'error')
}
}
}
this.log(`visit ok (${url})`)
}
}
module.exports = PuppeteerBrowser