diff --git a/libsupport/src/URI.cpp b/libsupport/src/URI.cpp index 97eddf85ec..1d9e4368ac 100644 --- a/libsupport/src/URI.cpp +++ b/libsupport/src/URI.cpp @@ -117,6 +117,9 @@ DoJoinPath(std::string_view dir, std::string_view file) { return fmt::format("{}{}", dir, file); } +// As defined by https://www.rfc-editor.org/rfc/rfc3986, +// we encode characters outside this list +// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=" bool ShouldURLEncode(int c) { if ('a' <= c && c <= 'z') { @@ -128,26 +131,32 @@ ShouldURLEncode(int c) { if ('0' <= c && c <= '9') { return false; } - if (c == '-') { - return false; - } - if (c == '.') { - return false; - } - if (c == '_') { - return false; - } - if (c == '~') { - return false; - } - - // We encode whole paths, so in addition to the standard unencoded characters - // above, we should not encode '/' either. - if (c == '/') { + switch (c) { + case '-': + case '.': + case '_': + case '~': + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': return false; + default: + return true; } - - return true; } // ToHex converts a char between 0 and 15 to an ASCII character from 0 to F. diff --git a/libsupport/test/uri.cpp b/libsupport/test/uri.cpp index 8a025abe37..3eb4e405d9 100644 --- a/libsupport/test/uri.cpp +++ b/libsupport/test/uri.cpp @@ -20,6 +20,8 @@ TestMake() { // this KATANA_LOG_ASSERT(Str2Uri("s3:///some/path//").path() == "/some/path/"); KATANA_LOG_ASSERT(Str2Uri("s3://some/path").path() == "some/path"); + KATANA_LOG_ASSERT( + Str2Uri("hdfs://somehost:8020/path").path() == "somehost:8020/path"); KATANA_LOG_ASSERT(Str2Uri("path").BaseName() == "path"); KATANA_LOG_ASSERT(Str2Uri("path///////").StripSep().path() == "path"); @@ -41,6 +43,9 @@ TestJoinPath() { katana::Uri::JoinPath("/some/long///", "/path") == "/some/long/path"); KATANA_LOG_ASSERT( katana::Uri::JoinPath("/some/long///", "//path") == "/some/long/path"); + KATANA_LOG_ASSERT( + katana::Uri::JoinPath("/host:8020/long///", "//path") == + "/host:8020/long/path"); } void @@ -66,6 +71,8 @@ TestDecode() { KATANA_LOG_ASSERT( katana::Uri::Decode("/%20with/%20spaces") == "/ with/ spaces"); + KATANA_LOG_ASSERT( + katana::Uri::Decode("host%3A8020/path") == "host:8020/path"); } } // namespace