-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.js
110 lines (81 loc) · 2.97 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
const puppeteer = require('puppeteer');
const fs = require('fs');
const mkdirp = require('mkdirp');
const writeHtml = (path, data) => new Promise((resolve, reject) => {
fs.writeFile(path, data, (err) => {
if (err) reject(err);
resolve(`[${path}] has been saved!`);
});
});
const scriptsLoading = (pendingScripts, page) => {
if (pendingScripts.length) {
console.log(`[${page.url()}] waiting for scripts: "${pendingScripts}" ...`);
}
return Promise.all(
pendingScripts.map(script => new Promise((resolve, reject) => {
const isPendingScript = (request) => (
['script', 'xhr'].includes(request.resourceType()) &&
request.url().includes(script)
);
page.on('response', response => {
const request = response.request()
if (isPendingScript(request)) {
console.log(`[${page.url()}] ${request.url()} loaded.`);
resolve();
}
});
page.on('requestfailed', request => {
if (isPendingScript(request)) {
console.log(`[${page.url()}] Fail to load ${request.url()}.`);
reject();
}
})
}))
);
};
const tagsClearing = (clearTags, page) => {
if (!clearTags || !clearTags.length) return;
console.log(`[${page.url()}] clear tags: ${clearTags}`);
return page.evaluateHandle(tags => {
const elements = document.querySelectorAll(tags);
elements.forEach(el => el.remove());
}, clearTags.join()
);
};
module.exports = async (urls, dest, options = {}) => {
const browser = await puppeteer.launch({ignoreHTTPSErrors: true});
const {
pendingScripts = [],
clearTags = ['script'],
renderTimeout = 1000
} = options;
const savePage = async (url, path) => {
const page = await browser.newPage();
console.log(`[${url}] loading ...`);
await Promise.all([
scriptsLoading(pendingScripts, page),
page.goto(url, {waitUntil: 'domcontentloaded'})
]);
console.log(`[${url}] waiting for rendering (${renderTimeout / 1000}s) ...`);
await page.waitFor(renderTimeout);
await tagsClearing(clearTags, page);
const html = await page.content();
await page.close();
return writeHtml(path, html);
};
await mkdirp(dest, {mode: 0o755}, err => {
if (err) console.log(err.toString());
});
const promises = urls.map(url => {
const pathname = (new URL(url)).pathname;
const path = dest + (pathname === '/' ? '/index' : pathname) + '.html';
return savePage(url, path);
});
await Promise.all(promises).then(() => {
console.log('Success! All pages saved.');
}).catch(e => {
console.error(e.toString());
});
browser.close();
console.log('Done. Browser closed.');
};