Skip to content

Commit

Permalink
[Base64] making padding optional (JuliaLang#44503)
Browse files Browse the repository at this point in the history
The pad character `=` is required in base64 encoding, but not in
decoding. Padding characters are not needed to correctly decode:
https://en.wikipedia.org/wiki/Base64#Output_padding

This PR makes it possible to decode base64-encoded strings/streams
without padding, matching the behavior in V8 in data urls.

(The [official spec](https://datatracker.ietf.org/doc/html/rfc4648#section-4)
for Base64 states that padding is required for base64-encoding, but it
does not specify a requirement for decoding.)
  • Loading branch information
fonsp authored Mar 8, 2022
1 parent 02abca3 commit 88062ea
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
7 changes: 3 additions & 4 deletions stdlib/Base64/src/decode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,20 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest)
b4 = decode(read(input, UInt8))
else
b4 = BASE64_CODE_END
break
end
end

# Check the decoded quadruplet.
k = 0
if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40
k = 3
elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 == BASE64_CODE_PAD
elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && (b4 == BASE64_CODE_PAD || b4 == BASE64_CODE_END)
b4 = 0x00
k = 2
elseif b1 < 0x40 && b2 < 0x40 && b3 == b4 == BASE64_CODE_PAD
elseif b1 < 0x40 && b2 < 0x40 && (b3 == BASE64_CODE_PAD || b3 == BASE64_CODE_END) && (b4 == BASE64_CODE_PAD || b4 == BASE64_CODE_END)
b3 = b4 = 0x00
k = 1
elseif b1 == b2 == b3 == BASE64_CODE_IGN && b4 == BASE64_CODE_END
elseif b1 == b2 == b3 == b4 == BASE64_CODE_END
b1 = b2 = b3 = b4 = 0x00
else
throw(ArgumentError("malformed base64 sequence"))
Expand Down
15 changes: 15 additions & 0 deletions stdlib/Base64/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,21 @@ const longDecodedText = "name = \"Genie\"\nuuid = \"c43c736e-a2d1-11e8-161f-af95

# issue #32397
@test String(base64decode(longEncodedText)) == longDecodedText;

# Optional padding
@test base64decode("AQ==") == base64decode("AQ")
@test base64decode("zzzzAQ==") == base64decode("zzzzAQ")
@test base64decode("AQI=") == base64decode("AQI")

# Too short, 6 bits do not cover a full byte.
@test_throws ArgumentError base64decode("a")
@test_throws ArgumentError base64decode("a===")
@test_throws ArgumentError base64decode("ZZZZa")
@test_throws ArgumentError base64decode("ZZZZa===")

# Bit padding should be ignored, which means that `jl` and `jk` should give the same result.
@test base64decode("jl") == base64decode("jk") == base64decode("jk==") == [142]
@test base64decode("Aa") == base64decode("AS") == base64decode("AS==") == [1]
end

@testset "Random data" begin
Expand Down

0 comments on commit 88062ea

Please sign in to comment.