const { AWS_LAMBDA_FUNCTION_NAME, CHROME_BIN, } = process.env; let chromium; let puppeteer; if (AWS_LAMBDA_FUNCTION_NAME) { // eslint-disable-next-line global-require, import/no-unresolved chromium = require('chrome-aws-lambda'); ({ puppeteer } = chromium); } else { // eslint-disable-next-line global-require puppeteer = require('puppeteer'); } const Browser = require('../browser'); function getJs() { const dereference = (obj, level = 0) => { try { // eslint-disable-next-line no-undef if (level > 5 || (level && obj === window)) { return '[Removed]'; } if (Array.isArray(obj)) { obj = obj.map(item => dereference(item, level + 1)); } if (typeof obj === 'function' || (typeof obj === 'object' && obj !== null)) { const newObj = {}; Object.keys(obj).forEach((key) => { newObj[key] = dereference(obj[key], level + 1); }); return newObj; } return obj; } catch (error) { return undefined; } }; // eslint-disable-next-line no-undef return dereference(window); } class PuppeteerBrowser extends Browser { constructor(options) { options.maxWait = options.maxWait || 60; super(options); } async visit(url) { let done = false; let browser; try { await new Promise(async (resolve, reject) => { try { browser = await puppeteer.launch(chromium ? { args: [...chromium.args, '--ignore-certificate-errors'], defaultViewport: chromium.defaultViewport, executablePath: await chromium.executablePath, headless: chromium.headless, } : { args: ['--no-sandbox', '--headless', '--disable-gpu', '--ignore-certificate-errors'], executablePath: CHROME_BIN, }); browser.on('disconnected', () => { if (!done) { reject(new Error('browser: disconnected')); } }); const page = await browser.newPage(); page.setDefaultTimeout(this.options.maxWait * 1.1); await page.setRequestInterception(true); page.on('error', error => reject(new Error(`page error: ${error.message || error}`))); let responseReceived = false; page.on('request', (request) => { try { if ( responseReceived && request.isNavigationRequest() && request.frame() === page.mainFrame() && request.url() !== url ) { this.log(`abort navigation to ${request.url()}`); request.abort('aborted'); } else if (!done) { if (!['document', 'script'].includes(request.resourceType())) { request.abort(); } else { request.continue(); } } } catch (error) { reject(new Error(`page error: ${error.message || error}`)); } }); page.on('response', (response) => { try { if (!this.statusCode) { this.statusCode = response.status(); this.headers = {}; const headers = response.headers(); Object.keys(headers).forEach((key) => { this.headers[key] = Array.isArray(headers[key]) ? headers[key] : [headers[key]]; }); this.contentType = headers['content-type'] || null; } if (response.status() < 300 || response.status() > 399) { responseReceived = true; } } catch (error) { reject(new Error(`page error: ${error.message || error}`)); } }); page.on('console', ({ _type, _text, _location }) => { if (!/Failed to load resource: net::ERR_FAILED/.test(_text)) { this.log(`${_text} (${_location.url}: ${_location.lineNumber})`, _type); } }); if (this.options.userAgent) { await page.setUserAgent(this.options.userAgent); } try { await Promise.race([ page.goto(url, { waitUntil: 'domcontentloaded' }), // eslint-disable-next-line no-shadow new Promise((resolve, reject) => setTimeout(() => reject(new Error('timeout')), this.options.maxWait)), ]); } catch (error) { throw new Error(error.message || error.toString()); } // eslint-disable-next-line no-undef const links = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('a')).map(({ hash, hostname, href, pathname, protocol, rel, }) => ({ hash, hostname, href, pathname, protocol, rel, }))); this.links = await links.jsonValue(); // eslint-disable-next-line no-undef const scripts = await page.evaluateHandle(() => Array.from(document.getElementsByTagName('script')).map(({ src, }) => src)); this.scripts = (await scripts.jsonValue()).filter(script => script); this.js = await page.evaluate(getJs); this.cookies = (await page.cookies()).map(({ name, value, domain, path, }) => ({ name, value, domain, path, })); this.html = await page.content(); resolve(); } catch (error) { reject(new Error(`visit error: ${error.message || error}`)); } }); } catch (error) { this.log(`visit error: ${error.message || error} (${url})`, 'error'); throw new Error(error.message || error.toString()); } finally { done = true; if (browser) { try { await browser.close(); this.log('browser close ok'); } catch (error) { this.log(`browser close error: ${error.message || error}`, 'error'); } } } this.log(`visit ok (${url})`); } } module.exports = PuppeteerBrowser;