Skip to content

Commit

Permalink
feat(html): better url rewrite (#213)
Browse files Browse the repository at this point in the history
* chore(html): better url rewrite

* ci: enable puppeteer

* test: update snapshot

* fix: implementation

* test: update snapshot
  • Loading branch information
Kikobeats authored Feb 28, 2025
1 parent d68398a commit e261c9c
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 12 deletions.
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"html-encode": "~2.1.7",
"html-urls": "~2.4.62",
"is-html-content": "~1.0.0",
"localhost-url-regex": "~1.0.13",
"is-local-address": "~2.2.0",
"lodash": "~4.17.21",
"mri": "~1.2.0",
"p-cancelable": "~2.1.0",
Expand Down Expand Up @@ -125,6 +125,9 @@
"finepack"
]
},
"pnpm": {
"neverBuiltDependencies": []
},
"simple-git-hooks": {
"commit-msg": "npx commitlint --edit",
"pre-commit": "npx nano-staged"
Expand Down
21 changes: 11 additions & 10 deletions src/html.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

const { get, split, nth, castArray, forEach } = require('lodash')
const debug = require('debug-logfmt')('html-get:rewrite')
const localhostUrl = require('localhost-url-regex')
const isLocalAddress = require('is-local-address')
const { TAGS: URL_TAGS } = require('html-urls')
const isHTML = require('is-html-content')
const cssUrl = require('css-url-regex')
Expand Down Expand Up @@ -118,15 +118,16 @@ const rewriteHtmlUrls = ({ $, url }) => {
$(tagName.join(',')).each(function () {
const el = $(this)
const attr = el.attr(urlAttr)

if (localhostUrl().test(attr)) {
el.remove()
} else if (typeof attr === 'string' && !attr.startsWith('http')) {
try {
const newAttr = new URL(attr, url).toString()
el.attr(urlAttr, newAttr)
} catch (_) {}
}
if (typeof attr !== 'string') return
try {
const urlObj = new URL(attr, url)
if (!urlObj.protocol.startsWith('http')) return
if (isLocalAddress(urlObj.hostname)) {
el.remove()
} else {
el.attr(urlAttr, urlObj.toString())
}
} catch (_) {}
})
})
}
Expand Down
4 changes: 4 additions & 0 deletions test/html/rewrite-urls.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,13 @@ test('remove localhost alike URLs', t => {
<meta charset="utf-8">
</head>
<body>
<script async="" src="/testfile" id="livereloadscript"></script>
<script async="" src="http://localhost:35729/livereload.js?snipver=1" id="livereloadscript"></script>
<script async="" src="http://127.0.0.1:35729/livereload.js?snipver=1" id="livereloadscript"></script>
<script async="" src="http://0.0.0.1:35729/livereload.js?snipver=1" id="livereloadscript"></script>
<a href="mailto:[email protected]">Email</a>
<a href="ftp://example.com/file.txt">FTP Link</a>
<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==" alt="Base64 Image">
</body>
</html>`,
headers: { 'content-type': 'text/html; charset=utf-8' }
Expand Down
6 changes: 5 additions & 1 deletion test/html/snapshots/rewrite-urls.js.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Generated by [AVA](https://avajs.dev).
<meta charset="utf-8">␊
</head>␊
<body>␊
<script async="" src="https://kikobeats.com/testfile" id="livereloadscript"></script>␊
<a href="mailto:[email protected]">Email</a>␊
<a href="ftp://example.com/file.txt">FTP Link</a>␊
<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==" alt="Base64 Image">␊
</body>␊
</html>`

Expand All @@ -35,7 +39,7 @@ Generated by [AVA](https://avajs.dev).
<!-- Search Engine -->␊
<meta name="description" content="a puppeter-like Node.js library for interacting with Headless production scenarios.">␊
<meta name="image" content="https://browserless.js.org/static/logo-banner.png">␊
<link rel="canonical" href="https://browserless.js.org">␊
<link rel="canonical" href="https://browserless.js.org/">␊
<title>browserless, a puppeter-like Node.js library for interacting with Headless production scenarios.</title>␊
<meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">␊
Expand Down
Binary file modified test/html/snapshots/rewrite-urls.js.snap
Binary file not shown.

0 comments on commit e261c9c

Please sign in to comment.