Skip to content

Commit

Permalink
Exclude example TLDs from RFC 2606 (#1335)
Browse files Browse the repository at this point in the history
Fixes #1283
  • Loading branch information
mre authored Jan 5, 2024
1 parent 861a718 commit 63ba63f
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 3 deletions.
6 changes: 6 additions & 0 deletions fixtures/TEST_EXAMPLE_DOMAINS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ mailto:[email protected]?subject=hello
http://example.net/foo/bar
[email protected]
[email protected]

https://test.localhost
http://foo.bar.invalid
foo.bar.invalid/some/path
https://example.example
http://integration.test
8 changes: 7 additions & 1 deletion fixtures/TEST_EXAMPLE_DOMAINS_FALSE_POSITIVES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
http://gobyexample.com/
https://examples.com/
https://texample.net/
https://texample.net/

http://foo.isnotinvalid
http://foo.bar.invalid2
http://integration.text
https://test.possiblylocalhost
https://example.examplenotexample
2 changes: 1 addition & 1 deletion lychee-bin/tests/example_domains.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ mod cli {

let output = cmd.get_output();
let output = std::str::from_utf8(&output.stdout).unwrap();
assert_eq!(output.lines().count(), 3);
assert_eq!(output.lines().count(), 8);

Ok(())
}
Expand Down
13 changes: 12 additions & 1 deletion lychee-lib/src/filter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,19 @@ use crate::Uri;
static EXAMPLE_DOMAINS: Lazy<HashSet<&'static str>> =
Lazy::new(|| HashSet::from_iter(["example.com", "example.org", "example.net", "example.edu"]));

#[cfg(all(not(test), not(feature = "check_example_domains")))]
/// We also exclude the example TLDs in section 2 of the same RFC.
/// This exclusion gets subsumed by the `check_example_domains` feature.
static EXAMPLE_TLDS: Lazy<HashSet<&'static str>> =
Lazy::new(|| HashSet::from_iter([".test", ".example", ".invalid", ".localhost"]));

// Allow usage of example domains in tests
#[cfg(any(test, feature = "check_example_domains"))]
static EXAMPLE_DOMAINS: Lazy<HashSet<&'static str>> = Lazy::new(HashSet::new);

#[cfg(any(test, feature = "check_example_domains"))]
static EXAMPLE_TLDS: Lazy<HashSet<&'static str>> = Lazy::new(HashSet::new);

static UNSUPPORTED_DOMAINS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
HashSet::from_iter([
// Twitter requires an account to view tweets
Expand Down Expand Up @@ -66,7 +75,9 @@ pub fn is_example_domain(uri: &Uri) -> bool {
|| domain
.split_once('.')
.map_or(false, |(_subdomain, tld_part)| tld_part == example)
})
}) || EXAMPLE_TLDS
.iter()
.any(|&example_tld| domain.ends_with(example_tld))
}
None => {
// Check if the URI is an email address.
Expand Down

0 comments on commit 63ba63f

Please sign in to comment.