Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add serializeHtml #214

Merged
merged 4 commits into from
Mar 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ Default: `false`

When is `true`, it will rewrite some common mistake related with HTML meta tags.

##### serializeHtml

It determines how HTML should be serialied before returning.

It's serialized `$ => ({ html: $.html() })` by default.

## License

**html-get** © [Microlink](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/html-get/blob/master/LICENSE.md) License.<br>
Expand Down
6 changes: 1 addition & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
"regex-iso-date": "latest",
"simple-git-hooks": "latest",
"standard": "latest",
"standard-markdown": "latest",
"standard-version": "latest"
},
"engines": {
Expand All @@ -85,7 +84,7 @@
"scripts": {
"clean": "rm -rf node_modules",
"contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true",
"lint": "standard-markdown README.md && standard",
"lint": "standard",
"postinstall": "node scripts/postinstall",
"postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)",
"pretest": "npm run lint",
Expand Down Expand Up @@ -118,9 +117,6 @@
"prettier-standard",
"standard --fix"
],
"*.md": [
"standard-markdown"
],
"package.json": [
"finepack"
]
Expand Down
34 changes: 27 additions & 7 deletions src/html.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,44 @@ const rewriteHtmlUrls = ({ $, url }) => {
})
}

const rewriteCssUrls = ({ html, url }) => {
const cssUrls = Array.from(
execall(cssUrl(), html).reduce((acc, match) => {
const replaceCssUrls = (url, stylesheet) => {
const cssUrls = Array.from(execall(cssUrl(), stylesheet)).reduce(
(acc, match) => {
match.subMatches.forEach(match => acc.add(match))
return acc
}, new Set())
},
new Set()
)

cssUrls.forEach(cssUrl => {
if (cssUrl.startsWith('/')) {
try {
const absoluteUrl = new URL(cssUrl, url).toString()
html = html.replaceAll(`url(${cssUrl})`, `url(${absoluteUrl})`)
stylesheet = stylesheet.replaceAll(
`url(${cssUrl})`,
`url(${absoluteUrl})`
)
} catch (_) {}
}
})

return html
return stylesheet
}

const rewriteCssUrls = ({ $, url }) => {
// Process <style> tags
// e.g., <style>body { background-image: url('/image.jpg'); }</style>
$('style').each((_, element) =>
$(element).html(replaceCssUrls(url, $(element).html()))
)

// Process elements with style attributes
// e.g., <div style="background-image: url('/image.jpg');"></div>
$('[style]').each((_, element) =>
$(element).attr('style', replaceCssUrls(url, $(element).attr('style')))
)

return $
}

const injectStyle = ({ $, styles }) =>
Expand Down Expand Up @@ -216,7 +236,7 @@ module.exports = ({
if (scripts) injectScripts({ $, scripts, type: 'text/javascript' })
if (modules) injectScripts({ $, modules, type: 'module' })

return rewriteUrls ? rewriteCssUrls({ html: $.html(), url }) : $.html()
return rewriteUrls ? rewriteCssUrls({ $, url }) : $
}

module.exports.getDate = getDate
14 changes: 9 additions & 5 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,14 @@ const getContent = PCancelable.fn(
onCancel(() => promise.cancel())

return promise.then(content => {
const html = addHtml({
const $ = addHtml({
...content,
...(isFetchMode ? puppeteerOpts : undefined),
rewriteUrls,
rewriteHtml
})

return { ...content, html }
return { ...content, $ }
})
}
)
Expand All @@ -261,8 +261,9 @@ module.exports = PCancelable.fn(
mutoolPath = defaultMutoolPath(),
prerender = 'auto',
puppeteerOpts,
rewriteHtml = false,
rewriteUrls = false,
rewriteHtml = false
serializeHtml = $ => ({ html: $.html() })
} = {},
onCancel
) => {
Expand Down Expand Up @@ -291,9 +292,12 @@ module.exports = PCancelable.fn(

onCancel(() => promise.cancel())

const { mode, ...payload } = await promise
const { mode, $, ...payload } = await promise

return Object.assign(payload, { stats: { mode, timing: duration() } })
return Object.assign(payload, {
...serializeHtml($),
stats: { mode, timing: duration() }
})
}
)

Expand Down
2 changes: 1 addition & 1 deletion test/html/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const test = require('ava')

const { prettyHtml } = require('../util')

const html = require('../../src/html')
const html = (...args) => require('../../src/html')(...args).html()

test('add minimal html markup', t => {
const output = html({
Expand Down
14 changes: 1 addition & 13 deletions test/html/rewrite-css-urls.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const test = require('ava')

const { prettyHtml } = require('../util')

const html = require('../../src/html')
const html = (...args) => require('../../src/html')(...args).html()

test("don't modify html markup", t => {
const output = html({
Expand All @@ -24,18 +24,6 @@ test("don't modify html markup", t => {
}
})

t.true(
output.includes(
'content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305"'
)
)

t.true(
output.includes(
'url(https://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305)'
)
)

t.snapshot(prettyHtml(output))
})

Expand Down
2 changes: 1 addition & 1 deletion test/html/rewrite-html.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const cheerio = require('cheerio')

const { prettyHtml } = require('../util')

const html = require('../../src/html')
const html = (...args) => require('../../src/html')(...args).html()

const composeHtml = meta =>
prettyHtml(`
Expand Down
2 changes: 1 addition & 1 deletion test/html/rewrite-urls.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const fs = require('fs')

const { prettyHtml } = require('../util')

const html = require('../../src/html')
const html = (...args) => require('../../src/html')(...args).html()

test('remove localhost alike URLs', t => {
const output = html({
Expand Down
Loading