|
|
|
@ -459,7 +459,7 @@ class Site {
|
|
|
|
|
promiseTimeout(
|
|
|
|
|
promise,
|
|
|
|
|
fallback,
|
|
|
|
|
errorMessage = 'Operation took too long to respond',
|
|
|
|
|
errorMessage = 'Operation took too long to complete',
|
|
|
|
|
maxWait = this.options.maxWait
|
|
|
|
|
) {
|
|
|
|
|
let timeout = null
|
|
|
|
@ -477,7 +477,13 @@ class Site {
|
|
|
|
|
|
|
|
|
|
error.code = 'PROMISE_TIMEOUT_ERROR'
|
|
|
|
|
|
|
|
|
|
fallback !== undefined ? resolve(fallback) : reject(error)
|
|
|
|
|
if (fallback !== undefined) {
|
|
|
|
|
this.error(error)
|
|
|
|
|
|
|
|
|
|
resolve(fallback)
|
|
|
|
|
} else {
|
|
|
|
|
reject(error)
|
|
|
|
|
}
|
|
|
|
|
}, maxWait)
|
|
|
|
|
}),
|
|
|
|
|
promise.then((value) => {
|
|
|
|
@ -632,8 +638,49 @@ class Site {
|
|
|
|
|
|
|
|
|
|
// page.on('console', (message) => this.log(message.text()))
|
|
|
|
|
|
|
|
|
|
// Cookies
|
|
|
|
|
const cookies = (await page.cookies()).reduce(
|
|
|
|
|
(cookies, { name, value }) => ({
|
|
|
|
|
...cookies,
|
|
|
|
|
[name.toLowerCase()]: [value],
|
|
|
|
|
}),
|
|
|
|
|
{}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// HTML
|
|
|
|
|
let html = await this.promiseTimeout(page.content(), '', 'Timeout (html)')
|
|
|
|
|
|
|
|
|
|
if (this.options.htmlMaxCols && this.options.htmlMaxRows) {
|
|
|
|
|
const batches = []
|
|
|
|
|
const rows = html.length / this.options.htmlMaxCols
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < rows; i += 1) {
|
|
|
|
|
if (
|
|
|
|
|
i < this.options.htmlMaxRows / 2 ||
|
|
|
|
|
i > rows - this.options.htmlMaxRows / 2
|
|
|
|
|
) {
|
|
|
|
|
batches.push(
|
|
|
|
|
html.slice(
|
|
|
|
|
i * this.options.htmlMaxCols,
|
|
|
|
|
(i + 1) * this.options.htmlMaxCols
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
html = batches.join('\n')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let links = []
|
|
|
|
|
let css = ''
|
|
|
|
|
let scripts = []
|
|
|
|
|
let meta = []
|
|
|
|
|
let js = []
|
|
|
|
|
let dom = []
|
|
|
|
|
|
|
|
|
|
if (html) {
|
|
|
|
|
// Links
|
|
|
|
|
const links = !this.options.recursive
|
|
|
|
|
links = !this.options.recursive
|
|
|
|
|
? []
|
|
|
|
|
: await this.promiseTimeout(
|
|
|
|
|
(
|
|
|
|
@ -659,7 +706,7 @@ class Site {
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// CSS
|
|
|
|
|
const css = await this.promiseTimeout(
|
|
|
|
|
css = await this.promiseTimeout(
|
|
|
|
|
(
|
|
|
|
|
await this.promiseTimeout(
|
|
|
|
|
page.evaluateHandle((maxRows) => {
|
|
|
|
@ -694,7 +741,7 @@ class Site {
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Script tags
|
|
|
|
|
const scripts = await this.promiseTimeout(
|
|
|
|
|
scripts = await this.promiseTimeout(
|
|
|
|
|
(
|
|
|
|
|
await this.promiseTimeout(
|
|
|
|
|
page.evaluateHandle(() =>
|
|
|
|
@ -711,7 +758,7 @@ class Site {
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Meta tags
|
|
|
|
|
const meta = await this.promiseTimeout(
|
|
|
|
|
meta = await this.promiseTimeout(
|
|
|
|
|
(
|
|
|
|
|
await this.promiseTimeout(
|
|
|
|
|
page.evaluateHandle(() =>
|
|
|
|
@ -738,44 +785,12 @@ class Site {
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// JavaScript
|
|
|
|
|
const js = this.options.noScripts
|
|
|
|
|
js = this.options.noScripts
|
|
|
|
|
? []
|
|
|
|
|
: await this.promiseTimeout(getJs(page), [], 'Timeout (js)')
|
|
|
|
|
|
|
|
|
|
// DOM
|
|
|
|
|
const dom = await this.promiseTimeout(getDom(page), [], 'Timeout (dom)')
|
|
|
|
|
|
|
|
|
|
// Cookies
|
|
|
|
|
const cookies = (await page.cookies()).reduce(
|
|
|
|
|
(cookies, { name, value }) => ({
|
|
|
|
|
...cookies,
|
|
|
|
|
[name.toLowerCase()]: [value],
|
|
|
|
|
}),
|
|
|
|
|
{}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// HTML
|
|
|
|
|
let html = await page.content()
|
|
|
|
|
|
|
|
|
|
if (this.options.htmlMaxCols && this.options.htmlMaxRows) {
|
|
|
|
|
const batches = []
|
|
|
|
|
const rows = html.length / this.options.htmlMaxCols
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < rows; i += 1) {
|
|
|
|
|
if (
|
|
|
|
|
i < this.options.htmlMaxRows / 2 ||
|
|
|
|
|
i > rows - this.options.htmlMaxRows / 2
|
|
|
|
|
) {
|
|
|
|
|
batches.push(
|
|
|
|
|
html.slice(
|
|
|
|
|
i * this.options.htmlMaxCols,
|
|
|
|
|
(i + 1) * this.options.htmlMaxCols
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
html = batches.join('\n')
|
|
|
|
|
dom = await this.promiseTimeout(getDom(page), [], 'Timeout (dom)')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this.cache[url.href] = {
|
|
|
|
|