From d5992f94e39f29d71918ba58f8ee5505524bfbf4 Mon Sep 17 00:00:00 2001 From: tompng Date: Tue, 16 Jan 2024 02:44:53 +0900 Subject: [PATCH] Fix coderange of invalid_encoding_string.<<(ord) Appending valid encoding character can change coderange from invalid to valid. Example: "\x95".force_encoding('sjis')<<0x5C will be a valid string "\x{955C}" --- string.c | 6 +++++- test/ruby/test_string.rb | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/string.c b/string.c index bb830fc06c7bbc..5eda835f4b36a3 100644 --- a/string.c +++ b/string.c @@ -3522,8 +3522,12 @@ rb_str_concat(VALUE str1, VALUE str2) } rb_str_resize(str1, pos+len); memcpy(RSTRING_PTR(str1) + pos, buf, len); - if (cr == ENC_CODERANGE_7BIT && code > 127) + if (cr == ENC_CODERANGE_7BIT && code > 127) { cr = ENC_CODERANGE_VALID; + } + else if (cr == ENC_CODERANGE_BROKEN) { + cr = ENC_CODERANGE_UNKNOWN; + } ENC_CODERANGE_SET(str1, cr); } return str1; diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 4aa119f8fd139e..42f2544b5a8692 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -301,6 +301,9 @@ def test_LSHIFT # '<<' assert_raise(RangeError, bug) {S("a".force_encoding(Encoding::UTF_8)) << -1} assert_raise(RangeError, bug) {S("a".force_encoding(Encoding::UTF_8)) << 0x81308130} assert_nothing_raised {S("a".force_encoding(Encoding::GB18030)) << 0x81308130} + + s = "\x95".force_encoding(Encoding::SJIS).tap(&:valid_encoding?) + assert_predicate(s << 0x5c, :valid_encoding?) end def test_MATCH # '=~'