diff --git a/README.md b/README.md index 18385ac..9249c7e 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,12 @@ Default: `false` When is `true`, it will rewrite some common mistake related with HTML meta tags. +##### serializeHtml + +It determines how HTML should be serialied before returning. + +It's serialized `$ => ({ html: $.html() })` by default. + ## License **html-get** © [Microlink](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/html-get/blob/master/LICENSE.md) License.
diff --git a/package.json b/package.json index bc59c6d..a5ce976 100644 --- a/package.json +++ b/package.json @@ -71,7 +71,6 @@ "regex-iso-date": "latest", "simple-git-hooks": "latest", "standard": "latest", - "standard-markdown": "latest", "standard-version": "latest" }, "engines": { @@ -85,7 +84,7 @@ "scripts": { "clean": "rm -rf node_modules", "contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true", - "lint": "standard-markdown README.md && standard", + "lint": "standard", "postinstall": "node scripts/postinstall", "postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)", "pretest": "npm run lint", @@ -118,9 +117,6 @@ "prettier-standard", "standard --fix" ], - "*.md": [ - "standard-markdown" - ], "package.json": [ "finepack" ] diff --git a/src/html.js b/src/html.js index 50ccbe5..9e86116 100644 --- a/src/html.js +++ b/src/html.js @@ -132,24 +132,44 @@ const rewriteHtmlUrls = ({ $, url }) => { }) } -const rewriteCssUrls = ({ html, url }) => { - const cssUrls = Array.from( - execall(cssUrl(), html).reduce((acc, match) => { +const replaceCssUrls = (url, stylesheet) => { + const cssUrls = Array.from(execall(cssUrl(), stylesheet)).reduce( + (acc, match) => { match.subMatches.forEach(match => acc.add(match)) return acc - }, new Set()) + }, + new Set() ) cssUrls.forEach(cssUrl => { if (cssUrl.startsWith('/')) { try { const absoluteUrl = new URL(cssUrl, url).toString() - html = html.replaceAll(`url(${cssUrl})`, `url(${absoluteUrl})`) + stylesheet = stylesheet.replaceAll( + `url(${cssUrl})`, + `url(${absoluteUrl})` + ) } catch (_) {} } }) - return html + return stylesheet +} + +const rewriteCssUrls = ({ $, url }) => { + // Process + $('style').each((_, element) => + $(element).html(replaceCssUrls(url, $(element).html())) + ) + + // Process elements with style attributes + // e.g.,
+ $('[style]').each((_, element) => + $(element).attr('style', replaceCssUrls(url, $(element).attr('style'))) + ) + + return $ } const injectStyle = ({ $, styles }) => @@ -216,7 +236,7 @@ module.exports = ({ if (scripts) injectScripts({ $, scripts, type: 'text/javascript' }) if (modules) injectScripts({ $, modules, type: 'module' }) - return rewriteUrls ? rewriteCssUrls({ html: $.html(), url }) : $.html() + return rewriteUrls ? rewriteCssUrls({ $, url }) : $ } module.exports.getDate = getDate diff --git a/src/index.js b/src/index.js index acd7e66..650bb4e 100644 --- a/src/index.js +++ b/src/index.js @@ -236,14 +236,14 @@ const getContent = PCancelable.fn( onCancel(() => promise.cancel()) return promise.then(content => { - const html = addHtml({ + const $ = addHtml({ ...content, ...(isFetchMode ? puppeteerOpts : undefined), rewriteUrls, rewriteHtml }) - return { ...content, html } + return { ...content, $ } }) } ) @@ -261,8 +261,9 @@ module.exports = PCancelable.fn( mutoolPath = defaultMutoolPath(), prerender = 'auto', puppeteerOpts, + rewriteHtml = false, rewriteUrls = false, - rewriteHtml = false + serializeHtml = $ => ({ html: $.html() }) } = {}, onCancel ) => { @@ -291,9 +292,12 @@ module.exports = PCancelable.fn( onCancel(() => promise.cancel()) - const { mode, ...payload } = await promise + const { mode, $, ...payload } = await promise - return Object.assign(payload, { stats: { mode, timing: duration() } }) + return Object.assign(payload, { + ...serializeHtml($), + stats: { mode, timing: duration() } + }) } ) diff --git a/test/html/index.js b/test/html/index.js index 598e80d..e7bec99 100644 --- a/test/html/index.js +++ b/test/html/index.js @@ -5,7 +5,7 @@ const test = require('ava') const { prettyHtml } = require('../util') -const html = require('../../src/html') +const html = (...args) => require('../../src/html')(...args).html() test('add minimal html markup', t => { const output = html({ diff --git a/test/html/rewrite-css-urls.js b/test/html/rewrite-css-urls.js index e1baeca..f1ccc04 100644 --- a/test/html/rewrite-css-urls.js +++ b/test/html/rewrite-css-urls.js @@ -5,7 +5,7 @@ const test = require('ava') const { prettyHtml } = require('../util') -const html = require('../../src/html') +const html = (...args) => require('../../src/html')(...args).html() test("don't modify html markup", t => { const output = html({ @@ -24,18 +24,6 @@ test("don't modify html markup", t => { } }) - t.true( - output.includes( - 'content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305"' - ) - ) - - t.true( - output.includes( - 'url(https://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305)' - ) - ) - t.snapshot(prettyHtml(output)) }) diff --git a/test/html/rewrite-html.js b/test/html/rewrite-html.js index 500bdae..d38bfd7 100644 --- a/test/html/rewrite-html.js +++ b/test/html/rewrite-html.js @@ -5,7 +5,7 @@ const cheerio = require('cheerio') const { prettyHtml } = require('../util') -const html = require('../../src/html') +const html = (...args) => require('../../src/html')(...args).html() const composeHtml = meta => prettyHtml(` diff --git a/test/html/rewrite-urls.js b/test/html/rewrite-urls.js index f4e75c8..b17b08c 100644 --- a/test/html/rewrite-urls.js +++ b/test/html/rewrite-urls.js @@ -6,7 +6,7 @@ const fs = require('fs') const { prettyHtml } = require('../util') -const html = require('../../src/html') +const html = (...args) => require('../../src/html')(...args).html() test('remove localhost alike URLs', t => { const output = html({