-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparser.js
109 lines (79 loc) · 2.92 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// @flow
import cheerio from 'cheerio'
import Promise from 'bluebird'
import R from 'ramda'
import Datauri from 'datauri'
import path from 'path'
const fs = Promise.promisifyAll(require('fs'))
const loadFile = (file): Promise<string> => fs.readFileAsync(file, 'utf8')
const isRemoteUrl = (url: string): boolean=> /^http(s)?:\/\/|^\/\//.test(url)
const extractStyles = ($, dir): ?Array<{inline: boolean, content: string}>=> {
const links = $('link').map((i, el)=> {
const srcAttr = $(el).attr('href')
if (isRemoteUrl(srcAttr)) {
return {inline: false, content: srcAttr}
} else {
const content = fs.readFileSync(`${dir}/${srcAttr}`, 'utf8')
return {inline: true, content: transformCss(content, path.dirname(`${dir}/${srcAttr}`))}
}
}).toArray()
const styles = $('style').map((i, el)=> {
return {inline: true, content: transformCss($(el).html(), dir)}
})
return [...links, ...styles]
}
const transformCss = (cssString, dir)=> {
return cssString.replace(/url\((.*)\)/g, (_, s)=> {
const dataUri = Datauri.sync(`${dir}/${s.replace(/\"/g, '').replace(/\'/g, '')}`)
return `url(${dataUri})`
})
}
const extractScripts = ($, dir): ?Array<{inline: boolean, content: string}>=> {
return $('script').map((i, el)=> {
const srcAttr = $(el).attr('src')
if (!!srcAttr) {
if (isRemoteUrl(srcAttr)) {
return {inline: false, content: srcAttr}
} else {
const content = fs.readFileSync(`${dir}/${srcAttr}`, 'utf8')
return {inline: true, content: content}
}
} else {
return {inline: true, content: $(el).html()}
}
}).toArray()
}
export default (file: string) => new Promise((resolve, reject)=>
loadFile(file)
.then((fileContent)=> {
const $ = cheerio.load(fileContent)
const dir = path.dirname(file)
// remove tags with [data-remove] from the page
// cheerio remove function is with side effect, the "$" object will be modified as a result
$('[data-remove]').remove()
const styles = extractStyles($, dir)
const scripts = extractScripts($, dir)
// replace images with datauri
$('img').each((i, el)=> {
const srcAttr = $(el).attr('src')
const datauri = $(el).attr('data-datauri')
if (isRemoteUrl(srcAttr) || datauri == "off") {
$(el).attr('src', srcAttr)
} else {
$(el).attr('src', Datauri.sync(`${dir}/${srcAttr}`))
}
})
// remove tags from the page
R.forEach((it)=> {
$(it).remove()
})(['script', 'style', 'link'])
const html = $.html()
const scriptTag = {
js: scripts,
css: styles,
html: html
}
resolve(scriptTag)
})
.catch(reject)
)