Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for zone identifiers #26

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
authors = ["Scott Godwin <[email protected]>"]
categories = ["parsing"]
description = "A URI parser including relative references"
edition = "2018"
edition = "2021"
homepage = "https://github.com/sgodwincs/uriparse-rs"
license = "MIT"
name = "uriparse"
Expand Down
25 changes: 25 additions & 0 deletions examples/cli.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use std::env;

fn main() {
let mut args = env::args();
let argv0 = args.next()
.expect("First argument is always present");

if args.size_hint().1 == Some(0) {
eprintln!("No URIs were given on the command line.");
eprintln!("Try running this as `{} http://example.com:1234/hello ../../path`", argv0);
}

for uri in args {
let parsed = uriparse::URIReference::try_from(uri.as_str());
println!("<{}>: {:#?}", uri, parsed);

if let Ok(parsed) = parsed {
let reconstructed = format!("{}", parsed);
if reconstructed != uri {
println!("Warning: URI doesn't round-trip -- serializes into:");
println!("<{}>", reconstructed);
}
}
}
}
105 changes: 94 additions & 11 deletions src/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ impl<'authority> Authority<'authority> {
let host = match &self.host {
Host::RegisteredName(name) => Host::RegisteredName(name.as_borrowed()),
Host::IPv4Address(ipv4) => Host::IPv4Address(*ipv4),
Host::IPv6Address(ipv6) => Host::IPv6Address(*ipv6),
Host::IPv6Address(ipv6, zone) => Host::IPv6Address(*ipv6, zone.as_ref().map(|z| z.as_borrowed())),
};
let password = self.password.as_ref().map(Password::as_borrowed);
let username = self.username.as_ref().map(Username::as_borrowed);
Expand Down Expand Up @@ -299,7 +299,7 @@ impl<'authority> Authority<'authority> {
let host = match self.host {
Host::RegisteredName(name) => Host::RegisteredName(name.into_owned()),
Host::IPv4Address(ipv4) => Host::IPv4Address(ipv4),
Host::IPv6Address(ipv6) => Host::IPv6Address(ipv6),
Host::IPv6Address(ipv6, zone) => Host::IPv6Address(ipv6, zone.map(|z| z.into_owned())),
};

Authority {
Expand Down Expand Up @@ -553,7 +553,7 @@ impl<'authority> Authority<'authority> {
/// let mut authority = Authority::try_from("example.com:8080").unwrap();
/// authority.set_host("127.0.0.1");
/// assert_eq!(authority.to_string(), "127.0.0.1:8080");
/// authority.set_host(Host::IPv6Address("::1".parse().unwrap()));
/// authority.set_host(Host::IPv6Address("::1".parse().unwrap(), Cow::from("")));
/// assert_eq!(authority.to_string(), "[::1]:8080");
/// ```
pub fn set_host<THost, THostError>(
Expand Down Expand Up @@ -773,7 +773,7 @@ pub enum Host<'host> {
IPv4Address(Ipv4Addr),

/// An IPv6 address. This will always be encased in brackets (`'['` and `']'`).
IPv6Address(Ipv6Addr),
IPv6Address(Ipv6Addr, Option<ZoneID<'host>>),

/// Any other host that does not follow the syntax of an IP address. This includes even hosts of
/// the form `"999.999.999.999"`. One might expect this to produce an invalid IPv4 error, but
Expand All @@ -791,7 +791,7 @@ impl Host<'_> {

match self {
IPv4Address(ipv4) => IPv4Address(*ipv4),
IPv6Address(ipv6) => IPv6Address(*ipv6),
IPv6Address(ipv6, zone) => IPv6Address(*ipv6, zone.as_ref().map(|z| z.as_borrowed())),
RegisteredName(name) => RegisteredName(name.as_borrowed()),
}
}
Expand All @@ -809,7 +809,7 @@ impl Host<'_> {

match self {
IPv4Address(ipv4) => IPv4Address(ipv4),
IPv6Address(ipv6) => IPv6Address(ipv6),
IPv6Address(ipv6, zone) => IPv6Address(ipv6, zone.map(|z| z.into_owned())),
RegisteredName(name) => RegisteredName(name.into_owned()),
}
}
Expand Down Expand Up @@ -847,7 +847,7 @@ impl Host<'_> {
/// ```
pub fn is_ipv6_address(&self) -> bool {
match self {
Host::IPv6Address(_) => true,
Host::IPv6Address(_, _) => true,
_ => false,
}
}
Expand Down Expand Up @@ -940,9 +940,13 @@ impl Display for Host<'_> {

match self {
IPv4Address(address) => address.fmt(formatter),
IPv6Address(address) => {
IPv6Address(address, zone) => {
formatter.write_char('[')?;
address.fmt(formatter)?;
if let Some(zone) = zone {
formatter.write_char('%')?;
formatter.write_str(zone.as_str())?;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you implement Display for zone ID and do zone.fmt(formatter)? instead

}
formatter.write_char(']')
}
RegisteredName(name) => formatter.write_str(name.as_str()),
Expand All @@ -960,7 +964,7 @@ impl From<IpAddr> for Host<'static> {
fn from(value: IpAddr) -> Self {
match value {
IpAddr::V4(address) => Host::IPv4Address(address),
IpAddr::V6(address) => Host::IPv6Address(address),
IpAddr::V6(address) => address.into(),
}
}
}
Expand All @@ -973,7 +977,7 @@ impl From<Ipv4Addr> for Host<'static> {

impl From<Ipv6Addr> for Host<'static> {
fn from(value: Ipv6Addr) -> Self {
Host::IPv6Address(value)
Host::IPv6Address(value, None)
}
}

Expand Down Expand Up @@ -1011,6 +1015,10 @@ impl<'host> TryFrom<&'host [u8]> for Host<'host> {
// IPv6

let ipv6 = &value[1..value.len() - 1];
let mut split_for_zone = ipv6.splitn(2, |c| c == &b'%');
let ipv6 = split_for_zone.next()
.expect("Even an empty string splits into at least one component");
let zone = split_for_zone.next();

if !check_ipv6(ipv6) {
return Err(HostError::InvalidIPv6Character);
Expand All @@ -1020,7 +1028,7 @@ impl<'host> TryFrom<&'host [u8]> for Host<'host> {
let ipv6: Ipv6Addr = unsafe { str::from_utf8_unchecked(ipv6) }
.parse()
.map_err(|_| HostError::InvalidIPv6Format)?;
Ok(Host::IPv6Address(ipv6))
Ok(Host::IPv6Address(ipv6, ZoneID::new(zone)?))
}
_ => {
let (valid, normalized) = check_ipv4_or_registered_name(value);
Expand Down Expand Up @@ -1300,6 +1308,65 @@ impl<'password> TryFrom<&'password str> for Password<'password> {
}
}

/// A zone identifier (as part of an IPv6 address)
///
/// This has no normalization rules, and a validity constraint of containing one or more unreserved
/// characters.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ZoneID<'zone> {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit tedious, but can you impl all the traits other types do in this library? For example, just go through fragment and implement stuff like AsRef, Deref, PartialEq with strings, etc.

value: Cow<'zone, str>,
}

impl ZoneID<'_> {
fn new<'a>(value: Option<&'a [u8]>) -> Result<Option<ZoneID<'a>>, HostError> {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following other types in this library, can you move this to a TryFrom<&[u8]> implementation

if let Some(value) = value {
if value.is_empty() || !check_zone(value) {
return Err(HostError::InvalidZoneCharacter);
}

// Unsafe: character set was just checked way stricter than a UTF-8 check
let value = unsafe { str::from_utf8_unchecked(value) };
Ok(Some(ZoneID { value: Cow::from(value) }))
} else {
Ok(None)
}
}

/// Returns a new zone ID which is identical but has a lifetime tied to this zone ID name.
pub fn as_borrowed(&self) -> ZoneID {
use self::Cow::*;

let value = match &self.value {
Borrowed(borrowed) => *borrowed,
Owned(owned) => owned.as_str(),
};

ZoneID {
value: Cow::Borrowed(value),
}
}

/// Returns a `str` representation of the zone ID.
pub fn as_str(&self) -> &str {
&self.value
}

/// Converts the [`ZoneID`] into an owned copy.
///
/// If you construct the registered name from a source with a non-static lifetime, you may run
/// into lifetime problems due to the way the struct is designed. Calling this function will
/// ensure that the returned value has a static lifetime.
///
/// This is different from just cloning. Cloning the zoen ID will just copy the
/// references, and thus the lifetime will remain the same.
pub fn into_owned(self) -> ZoneID<'static> {
ZoneID {
value: Cow::from(self.value.into_owned()),
}
}
}

/// A host that is a registered name (i.e. not an IP literal).
///
/// The registered name is case-insensitive meaning that `"example.com"` and `"ExAmPlE.CoM"` refer
Expand Down Expand Up @@ -1879,6 +1946,10 @@ pub enum HostError {
/// character.
InvalidIPv6Character,

/// The zone identifier (inside an IPv6 address after the `%`) contained characters other than
/// the allowed (which are the "unreserved" set).
InvalidZoneCharacter,

/// The syntax for an IPv6 literal was used (i.e. `"[...]"`) and all of the characters were
/// valid IPv6 characters. However, the format of the literal was invalid.
InvalidIPv6Format,
Expand All @@ -1901,6 +1972,7 @@ impl Display for HostError {
}
InvalidIPv6Character => write!(formatter, "invalid host IPv6 character"),
InvalidIPv6Format => write!(formatter, "invalid host IPv6 format"),
InvalidZoneCharacter => write!(formatter, "invalid character in zone ID"),
InvalidIPvFutureCharacter => write!(formatter, "invalid host IPvFuture character"),
}
}
Expand Down Expand Up @@ -2122,6 +2194,17 @@ fn check_ipv6(value: &[u8]) -> bool {
true
}

/// Returns true if the byte string contains only valid ZoneID characters.
fn check_zone(value: &[u8]) -> bool {
for &byte in value {
if let 0 = UNRESERVED_CHAR_MAP[byte as usize] {
return false;
}
}

true
}

/// Returns true if the byte string contains only valid future IP literal characters. This also
/// ensures that percent encodings are valid.
fn check_ipvfuture(value: &[u8]) -> bool {
Expand Down