From d70a9392affb9ace3c3c32af9acc8129ab6976b7 Mon Sep 17 00:00:00 2001 From: ChangYanChu Date: Sat, 8 Feb 2025 20:03:44 +0800 Subject: [PATCH] improve UTF-8 string handling in JSONValue (#6011) ```text feat(json): improve UTF-8 string handling in JSONValue - Add proper UTF-8 multi-byte character sequence handling - Add boundary checks for UTF-8 sequences - Keep original code structure and flow - Add detailed comments for UTF-8 processing logic This change improves the robustness of JSON string handling while maintaining compatibility with existing code. --- src/serialization/JSONValue.cpp | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/serialization/JSONValue.cpp b/src/serialization/JSONValue.cpp index 64dc10abe8..20cd903736 100644 --- a/src/serialization/JSONValue.cpp +++ b/src/serialization/JSONValue.cpp @@ -850,18 +850,26 @@ std::string JSONValue::StringifyString(const std::string &str) str_out += "\\r"; } else if (chr == '\t') { str_out += "\\t"; - } else if (chr < ' ' || chr > 126) { - str_out += "\\u"; - for (int i = 0; i < 4; i++) { - int value = (chr >> 12) & 0xf; - if (value >= 0 && value <= 9) - str_out += (char)('0' + value); - else if (value >= 10 && value <= 15) - str_out += (char)('A' + (value - 10)); - chr <<= 4; - } + } else if (chr < 0x20 || chr == 0x7F) { + char buf[7]; + snprintf(buf, sizeof(buf), "\\u%04x", chr); + str_out += buf; + } else if (chr < 0x80) { + str_out += chr; } else { str_out += chr; + size_t remain = str.end() - iter - 1; + if ((chr & 0xE0) == 0xC0 && remain >= 1) { + ++iter; + str_out += *iter; + } else if ((chr & 0xF0) == 0xE0 && remain >= 2) { + str_out += *(++iter); + str_out += *(++iter); + } else if ((chr & 0xF8) == 0xF0 && remain >= 3) { + str_out += *(++iter); + str_out += *(++iter); + str_out += *(++iter); + } } ++iter;