Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make sure JSON output strings are sanitized #114

Merged
merged 4 commits into from
May 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions lib/attributes_json.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# the_foo.number_of_bars = 42
# the_foo.as_json #=> {:number_of_bars => 42}
module FormatParser::AttributesJSON
UNICODE_REPLACEMENT_CHAR = [0xFFFD].pack('U')
MAXIMUM_JSON_NESTING_WHEN_SANITIZING = 256

# Implements a sane default `as_json` for an object
# that accessors defined
Expand All @@ -19,11 +21,12 @@ def as_json(root: false)
h['nature'] = nature if respond_to?(:nature) # Needed for file info structs
methods.grep(/\w\=$/).each_with_object(h) do |attr_writer_method_name, h|
reader_method_name = attr_writer_method_name.to_s.gsub(/\=$/, '')
value = public_send(reader_method_name)
value = nil if value == Float::INFINITY
# When calling as_json on our members there is no need to pass the root: option given to us
# by the caller
h[reader_method_name] = value.respond_to?(:as_json) ? value.as_json : value
attribute_value = public_send(reader_method_name)
# When calling as_json on our members there is no need to pass
# the root: option given to us by the caller
unwrapped_attribute_value = attribute_value.respond_to?(:as_json) ? attribute_value.as_json : attribute_value
sanitized_value = _sanitize_json_value(unwrapped_attribute_value)
h[reader_method_name] = sanitized_value
end
if root
{'format_parser_file_info' => h}
Expand All @@ -32,6 +35,32 @@ def as_json(root: false)
end
end

# Used for sanitizing values that are sourced to `JSON::Generator::State#generate`
# The reason we need to do this is as follows: `JSON.generate / JSON.dump / JSON.pretty_generate`
# use a totally different code path than `"foo".to_json(generator_state)`. We cannot predict
# which one of these two ways our users will be using, and at the same time we need to prevent
# invalid Strings (ones which cannot be encoded into UTF-8) as well as Float::INFINITY values
# from being passed to the JSON encoder. Since we cannot override the JSON generator with
# these additions, instead we will deep-convert the entire object being output to make sure
# it is up to snuff.
def _sanitize_json_value(value, nesting = 0)
raise ArgumentError, 'Nested JSON-ish structure too deep' if nesting > MAXIMUM_JSON_NESTING_WHEN_SANITIZING
case value
when Float::INFINITY
nil
when String
value.encode(Encoding::UTF_8, undef: :replace, replace: UNICODE_REPLACEMENT_CHAR)
when Hash
Hash[value.map { |k, v| [_sanitize_json_value(k, nesting + 1), _sanitize_json_value(v, nesting + 1)] }]
when Array
value.map { |v| _sanitize_json_value(v, nesting + 1) }
when Struct
_sanitize_json_value(value.to_h, nesting + 1)
else
value
end
end

# Implements to_json with sane defaults, with or without arguments
def to_json(*maybe_generator_state)
as_json(root: false).to_json(*maybe_generator_state)
Expand Down
43 changes: 43 additions & 0 deletions spec/attributes_json_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,47 @@ def nature

expect(readback).to have_key(:nature)
end

it 'converts purely-binary String objects deeply nested in the struct to escapes and question marks' do
nasty_hash = {
id: 'TIT2',
size: 37,
flags: "\x00\x00",
struct: Struct.new(:key).new('Value'),
content: "\x01\xFF\xFEb\x00i\x00r\x00d\x00s\x00 \x005\x00 \x00m\x00o\x00r\x00e\x00 \x00c\x00o\x00m\x00p\x00".b
}
expect {
JSON.pretty_generate(nasty_hash) # Should not raise an error
}.to raise_error(Encoding::UndefinedConversionError)

anon_class = Struct.new(:evil)
anon_class.include FormatParser::AttributesJSON

object_with_attributes_module = anon_class.new(nasty_hash)
output = JSON.pretty_generate(object_with_attributes_module)

parsed_output = JSON.parse(output, symbolize_names: true)

expect(parsed_output[:evil][:struct]).to eq(key: 'Value')
expect(parsed_output[:evil][:id]).to eq('TIT2')
expect(parsed_output[:evil][:flags]).to be_kind_of(String)
end

it 'prevents traversals of data structures which are too deep with an exception' do
fractal_hash = {}
current = fractal_hash
1024.times do
current[:leaf] = {}
current = current[:leaf]
end

anon_class = Struct.new(:evil)
anon_class.include FormatParser::AttributesJSON

object_with_attributes_module = anon_class.new(fractal_hash)

expect {
JSON.pretty_generate(object_with_attributes_module)
}.to raise_error(/structure too deep/)
end
end
1 change: 0 additions & 1 deletion spec/parsers/zip_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@
expect(json_parsed_repr[:format]).to eq('zip')
expect(json_parsed_repr[:entries]).to be_kind_of(Array)
expect(json_parsed_repr[:entries].length).to eq(3)

json_parsed_repr[:entries].each do |e|
expect(e[:filename]).to be_kind_of(String)
expect(e[:size]).to be_kind_of(Integer)
Expand Down