diff --git a/README.md b/README.md
index 18385ac..9249c7e 100644
--- a/README.md
+++ b/README.md
@@ -158,6 +158,12 @@ Default: `false`
When is `true`, it will rewrite some common mistake related with HTML meta tags.
+##### serializeHtml
+
+It determines how HTML should be serialied before returning.
+
+It's serialized `$ => ({ html: $.html() })` by default.
+
## License
**html-get** © [Microlink](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/html-get/blob/master/LICENSE.md) License.
diff --git a/package.json b/package.json
index bc59c6d..a5ce976 100644
--- a/package.json
+++ b/package.json
@@ -71,7 +71,6 @@
"regex-iso-date": "latest",
"simple-git-hooks": "latest",
"standard": "latest",
- "standard-markdown": "latest",
"standard-version": "latest"
},
"engines": {
@@ -85,7 +84,7 @@
"scripts": {
"clean": "rm -rf node_modules",
"contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true",
- "lint": "standard-markdown README.md && standard",
+ "lint": "standard",
"postinstall": "node scripts/postinstall",
"postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)",
"pretest": "npm run lint",
@@ -118,9 +117,6 @@
"prettier-standard",
"standard --fix"
],
- "*.md": [
- "standard-markdown"
- ],
"package.json": [
"finepack"
]
diff --git a/src/html.js b/src/html.js
index 50ccbe5..9e86116 100644
--- a/src/html.js
+++ b/src/html.js
@@ -132,24 +132,44 @@ const rewriteHtmlUrls = ({ $, url }) => {
})
}
-const rewriteCssUrls = ({ html, url }) => {
- const cssUrls = Array.from(
- execall(cssUrl(), html).reduce((acc, match) => {
+const replaceCssUrls = (url, stylesheet) => {
+ const cssUrls = Array.from(execall(cssUrl(), stylesheet)).reduce(
+ (acc, match) => {
match.subMatches.forEach(match => acc.add(match))
return acc
- }, new Set())
+ },
+ new Set()
)
cssUrls.forEach(cssUrl => {
if (cssUrl.startsWith('/')) {
try {
const absoluteUrl = new URL(cssUrl, url).toString()
- html = html.replaceAll(`url(${cssUrl})`, `url(${absoluteUrl})`)
+ stylesheet = stylesheet.replaceAll(
+ `url(${cssUrl})`,
+ `url(${absoluteUrl})`
+ )
} catch (_) {}
}
})
- return html
+ return stylesheet
+}
+
+const rewriteCssUrls = ({ $, url }) => {
+ // Process
+ $('style').each((_, element) =>
+ $(element).html(replaceCssUrls(url, $(element).html()))
+ )
+
+ // Process elements with style attributes
+ // e.g.,
+ $('[style]').each((_, element) =>
+ $(element).attr('style', replaceCssUrls(url, $(element).attr('style')))
+ )
+
+ return $
}
const injectStyle = ({ $, styles }) =>
@@ -216,7 +236,7 @@ module.exports = ({
if (scripts) injectScripts({ $, scripts, type: 'text/javascript' })
if (modules) injectScripts({ $, modules, type: 'module' })
- return rewriteUrls ? rewriteCssUrls({ html: $.html(), url }) : $.html()
+ return rewriteUrls ? rewriteCssUrls({ $, url }) : $
}
module.exports.getDate = getDate
diff --git a/src/index.js b/src/index.js
index acd7e66..650bb4e 100644
--- a/src/index.js
+++ b/src/index.js
@@ -236,14 +236,14 @@ const getContent = PCancelable.fn(
onCancel(() => promise.cancel())
return promise.then(content => {
- const html = addHtml({
+ const $ = addHtml({
...content,
...(isFetchMode ? puppeteerOpts : undefined),
rewriteUrls,
rewriteHtml
})
- return { ...content, html }
+ return { ...content, $ }
})
}
)
@@ -261,8 +261,9 @@ module.exports = PCancelable.fn(
mutoolPath = defaultMutoolPath(),
prerender = 'auto',
puppeteerOpts,
+ rewriteHtml = false,
rewriteUrls = false,
- rewriteHtml = false
+ serializeHtml = $ => ({ html: $.html() })
} = {},
onCancel
) => {
@@ -291,9 +292,12 @@ module.exports = PCancelable.fn(
onCancel(() => promise.cancel())
- const { mode, ...payload } = await promise
+ const { mode, $, ...payload } = await promise
- return Object.assign(payload, { stats: { mode, timing: duration() } })
+ return Object.assign(payload, {
+ ...serializeHtml($),
+ stats: { mode, timing: duration() }
+ })
}
)
diff --git a/test/html/index.js b/test/html/index.js
index 598e80d..e7bec99 100644
--- a/test/html/index.js
+++ b/test/html/index.js
@@ -5,7 +5,7 @@ const test = require('ava')
const { prettyHtml } = require('../util')
-const html = require('../../src/html')
+const html = (...args) => require('../../src/html')(...args).html()
test('add minimal html markup', t => {
const output = html({
diff --git a/test/html/rewrite-css-urls.js b/test/html/rewrite-css-urls.js
index e1baeca..f1ccc04 100644
--- a/test/html/rewrite-css-urls.js
+++ b/test/html/rewrite-css-urls.js
@@ -5,7 +5,7 @@ const test = require('ava')
const { prettyHtml } = require('../util')
-const html = require('../../src/html')
+const html = (...args) => require('../../src/html')(...args).html()
test("don't modify html markup", t => {
const output = html({
@@ -24,18 +24,6 @@ test("don't modify html markup", t => {
}
})
- t.true(
- output.includes(
- 'content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305"'
- )
- )
-
- t.true(
- output.includes(
- 'url(https://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305)'
- )
- )
-
t.snapshot(prettyHtml(output))
})
diff --git a/test/html/rewrite-html.js b/test/html/rewrite-html.js
index 500bdae..d38bfd7 100644
--- a/test/html/rewrite-html.js
+++ b/test/html/rewrite-html.js
@@ -5,7 +5,7 @@ const cheerio = require('cheerio')
const { prettyHtml } = require('../util')
-const html = require('../../src/html')
+const html = (...args) => require('../../src/html')(...args).html()
const composeHtml = meta =>
prettyHtml(`
diff --git a/test/html/rewrite-urls.js b/test/html/rewrite-urls.js
index f4e75c8..b17b08c 100644
--- a/test/html/rewrite-urls.js
+++ b/test/html/rewrite-urls.js
@@ -6,7 +6,7 @@ const fs = require('fs')
const { prettyHtml } = require('../util')
-const html = require('../../src/html')
+const html = (...args) => require('../../src/html')(...args).html()
test('remove localhost alike URLs', t => {
const output = html({