Skip to content

Commit

Permalink
expand the list of non-coding URI characters (#917)
Browse files Browse the repository at this point in the history
This PR expands the list of characters not to be encoded (RFC 3986 - https://www.rfc-editor.org/rfc/rfc3986).
  • Loading branch information
wkyu2kg authored Feb 22, 2022
1 parent 4274af0 commit 64d1c08
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 18 deletions.
45 changes: 27 additions & 18 deletions libsupport/src/URI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ DoJoinPath(std::string_view dir, std::string_view file) {
return fmt::format("{}{}", dir, file);
}

// As defined by https://www.rfc-editor.org/rfc/rfc3986,
// we encode characters outside this list
// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;="
bool
ShouldURLEncode(int c) {
if ('a' <= c && c <= 'z') {
Expand All @@ -128,26 +131,32 @@ ShouldURLEncode(int c) {
if ('0' <= c && c <= '9') {
return false;
}
if (c == '-') {
return false;
}
if (c == '.') {
return false;
}
if (c == '_') {
return false;
}
if (c == '~') {
return false;
}

// We encode whole paths, so in addition to the standard unencoded characters
// above, we should not encode '/' either.
if (c == '/') {
switch (c) {
case '-':
case '.':
case '_':
case '~':
case ':':
case '/':
case '?':
case '#':
case '[':
case ']':
case '@':
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
return false;
default:
return true;
}

return true;
}

// ToHex converts a char between 0 and 15 to an ASCII character from 0 to F.
Expand Down
7 changes: 7 additions & 0 deletions libsupport/test/uri.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ TestMake() {
// this
KATANA_LOG_ASSERT(Str2Uri("s3:///some/path//").path() == "/some/path/");
KATANA_LOG_ASSERT(Str2Uri("s3://some/path").path() == "some/path");
KATANA_LOG_ASSERT(
Str2Uri("hdfs://somehost:8020/path").path() == "somehost:8020/path");

KATANA_LOG_ASSERT(Str2Uri("path").BaseName() == "path");
KATANA_LOG_ASSERT(Str2Uri("path///////").StripSep().path() == "path");
Expand All @@ -41,6 +43,9 @@ TestJoinPath() {
katana::Uri::JoinPath("/some/long///", "/path") == "/some/long/path");
KATANA_LOG_ASSERT(
katana::Uri::JoinPath("/some/long///", "//path") == "/some/long/path");
KATANA_LOG_ASSERT(
katana::Uri::JoinPath("/host:8020/long///", "//path") ==
"/host:8020/long/path");
}

void
Expand All @@ -66,6 +71,8 @@ TestDecode() {

KATANA_LOG_ASSERT(
katana::Uri::Decode("/%20with/%20spaces") == "/ with/ spaces");
KATANA_LOG_ASSERT(
katana::Uri::Decode("host%3A8020/path") == "host:8020/path");
}

} // namespace
Expand Down

0 comments on commit 64d1c08

Please sign in to comment.