From 25406de323d753b0844f63873a570bea0a44cb13 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 13 Jan 2025 19:49:43 +0800 Subject: [PATCH] fmt Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnArray.cpp | 10 +-- dbms/src/Columns/ColumnNullable.cpp | 6 +- dbms/src/Columns/ColumnString.cpp | 44 +++++++--- .../gtest_column_serialize_deserialize.cpp | 8 +- dbms/src/TiDB/Collation/Collator.cpp | 83 ++++++++++++------- dbms/src/TiDB/Collation/Collator.h | 20 ++--- 6 files changed, 98 insertions(+), 73 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 98dd98022b4..815fcd74231 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -304,14 +304,8 @@ void ColumnArray::serializeToPosImpl( } if constexpr (for_compare) - getData().serializeToPosForCmpColumnArray( - pos, - start, - length, - has_null, - getOffsets(), - collator, - sort_key_container); + getData() + .serializeToPosForCmpColumnArray(pos, start, length, has_null, getOffsets(), collator, sort_key_container); else getData().serializeToPosForColumnArray(pos, start, length, has_null, getOffsets()); } diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 41316c1e843..39fbe8f6f5e 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -373,10 +373,8 @@ void ColumnNullable::deserializeForCmpAndInsertFromPosColumnArray( bool use_nt_align_buffer, const TiDB::TiDBCollatorPtr & collator) { - getNullMapColumn() - .deserializeForCmpAndInsertFromPosColumnArray(pos, array_offsets, use_nt_align_buffer, collator); - getNestedColumn() - .deserializeForCmpAndInsertFromPosColumnArray(pos, array_offsets, use_nt_align_buffer, collator); + getNullMapColumn().deserializeForCmpAndInsertFromPosColumnArray(pos, array_offsets, use_nt_align_buffer, collator); + getNestedColumn().deserializeForCmpAndInsertFromPosColumnArray(pos, array_offsets, use_nt_align_buffer, collator); } void ColumnNullable::deserializeAndInsertFromPosForColumnArray( PaddedPODArray & pos, diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 2212596c11f..926c916ff61 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -16,8 +16,8 @@ #include #include #include -#include #include +#include #include #include @@ -560,13 +560,22 @@ void ColumnString::countSerializeByteSizeForCmpColumnArray( if likely (collator != nullptr) { if (collator->maxBytesForOneChar() > 1) - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, collator); + countSerializeByteSizeForColumnArrayImpl( + byte_size, + array_offsets, + collator); else - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, collator); + countSerializeByteSizeForColumnArrayImpl( + byte_size, + array_offsets, + collator); } else { - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, nullptr); + countSerializeByteSizeForColumnArrayImpl( + byte_size, + array_offsets, + nullptr); } } @@ -574,7 +583,10 @@ void ColumnString::countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const { - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, nullptr); + countSerializeByteSizeForColumnArrayImpl( + byte_size, + array_offsets, + nullptr); } template @@ -699,12 +711,12 @@ void ColumnString::serializeToPosImplType( { RUNTIME_CHECK(collator && sort_key_container); -#define M(VAR_NAME, REAL_TYPE, COLLATOR_ID) \ - case (COLLATOR_ID): \ - { \ - serializeToPosImpl(pos, start, length, collator, sort_key_container); \ - break; \ - } +#define M(VAR_NAME, REAL_TYPE, COLLATOR_ID) \ + case (COLLATOR_ID): \ + { \ + serializeToPosImpl(pos, start, length, collator, sort_key_container); \ + break; \ + } switch (collator->getCollatorId()) { @@ -719,7 +731,12 @@ void ColumnString::serializeToPosImplType( } else { - serializeToPosImpl(pos, start, length, collator, sort_key_container); + serializeToPosImpl( + pos, + start, + length, + collator, + sort_key_container); } } @@ -749,7 +766,8 @@ void ColumnString::serializeToPosImpl( const void * src = &chars[offsetAt(start + i)]; if constexpr (has_collator) { - auto sort_key = derived_collator->sortKey(reinterpret_cast(src), str_size - 1, *sort_key_container); + auto sort_key + = derived_collator->sortKey(reinterpret_cast(src), str_size - 1, *sort_key_container); str_size = sort_key.size; src = sort_key.data; } diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index e8e0279a492..c18d7780eee 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -728,13 +728,7 @@ try auto col_string = createColumn({"hangzhou", "杭州", "你好世界", "欧元€", "abc里拉₤", "12法郎₣", ""}).column; testCountSerializeByteSize( col_string, - {4 + 8, - 4 + 6, - 4 + 12, - 4 + 9, - 4 + 12, - 4 + 11, - 4 + 0}, + {4 + 8, 4 + 6, 4 + 12, 4 + 9, 4 + 12, 4 + 11, 4 + 0}, true, collator_utf8_bin); testCountSerializeByteSize( diff --git a/dbms/src/TiDB/Collation/Collator.cpp b/dbms/src/TiDB/Collation/Collator.cpp index 1ee04472010..1e7ebb9ec03 100644 --- a/dbms/src/TiDB/Collation/Collator.cpp +++ b/dbms/src/TiDB/Collation/Collator.cpp @@ -124,28 +124,28 @@ bool Pattern::match(const char * s, size_t length) const { switch (match_types[p_idx]) { - case Match: - if (s_offset < length - && Collator::regexEq(Collator::decodeChar(s, tmp_s_offset = s_offset), chars[p_idx])) - { - p_idx++; - s_offset = tmp_s_offset; - continue; - } - break; - case One: - if (s_offset < length) - { - p_idx++; - Collator::decodeChar(s, s_offset); - continue; - } - break; - case Any: - next_p_idx = p_idx; - Collator::decodeChar(s, next_s_offset = s_offset); + case Match: + if (s_offset < length + && Collator::regexEq(Collator::decodeChar(s, tmp_s_offset = s_offset), chars[p_idx])) + { p_idx++; + s_offset = tmp_s_offset; continue; + } + break; + case One: + if (s_offset < length) + { + p_idx++; + Collator::decodeChar(s, s_offset); + continue; + } + break; + case Any: + next_p_idx = p_idx; + Collator::decodeChar(s, next_s_offset = s_offset); + p_idx++; + continue; } } if (0 < next_s_offset && next_s_offset <= length) @@ -161,11 +161,12 @@ bool Pattern::match(const char * s, size_t length) const template inline std::unique_ptr BinCollator::pattern() const -{ return std::make_unique>>(); } +{ + return std::make_unique>>(); +} template -inline typename BinCollator::CharType -BinCollator::decodeChar(const char * s, size_t & offset) +inline typename BinCollator::CharType BinCollator::decodeChar(const char * s, size_t & offset) { if constexpr (std::is_same_v) { @@ -198,7 +199,11 @@ int GeneralCICollator::compare(const char * s1, size_t length1, const char * s2, } template -StringRef GeneralCICollator::convertImpl(const char * s, size_t length, std::string & container, std::vector * lens) const +StringRef GeneralCICollator::convertImpl( + const char * s, + size_t length, + std::string & container, + std::vector * lens) const { std::string_view v; @@ -235,7 +240,10 @@ StringRef GeneralCICollator::convertImpl(const char * s, size_t length, std::str return StringRef(container.data(), total_size); } -inline GeneralCICollator::CharType GeneralCICollator::decodeChar(const char * s, size_t & offset) { return decodeUtf8Char(s, offset); } +inline GeneralCICollator::CharType GeneralCICollator::decodeChar(const char * s, size_t & offset) +{ + return decodeUtf8Char(s, offset); +} namespace UnicodeCI { @@ -357,7 +365,11 @@ int UCACICollator::compare(const char * s1, size_t length1, const ch template template -StringRef UCACICollator::convertImpl(const char * s, size_t length, std::string & container, std::vector * lens) const +StringRef UCACICollator::convertImpl( + const char * s, + size_t length, + std::string & container, + std::vector * lens) const { std::string_view v; @@ -403,10 +415,20 @@ StringRef UCACICollator::convertImpl(const char * s, size_t length, } template -inline typename UCACICollator::CharType UCACICollator::decodeChar(const char * s, size_t & offset) { return decodeUtf8Char(s, offset); } +inline typename UCACICollator::CharType UCACICollator::decodeChar( + const char * s, + size_t & offset) +{ + return decodeUtf8Char(s, offset); +} template -inline void UCACICollator::weight(uint64_t & first, uint64_t & second, size_t & offset, size_t length, const char * s) +inline void UCACICollator::weight( + uint64_t & first, + uint64_t & second, + size_t & offset, + size_t length, + const char * s) { if (first == 0) { @@ -683,8 +705,7 @@ struct TiDBCollatorPtrMap TiDBCollatorPtrMap() { -#define M(VAR_NAME, REAL_TYPE, COLLATOR_ID) \ - static const auto VAR_NAME = REAL_TYPE(COLLATOR_ID); +#define M(VAR_NAME, REAL_TYPE, COLLATOR_ID) static const auto VAR_NAME = REAL_TYPE(COLLATOR_ID); APPLY_FOR_COLLATOR_TYPES_WITH_VARS(tmp_var, M) #undef M @@ -695,7 +716,7 @@ struct TiDBCollatorPtrMap #define M(name) \ do \ { \ - auto & collator = (tmp_var_##name); \ + auto & collator = (tmp_var_##name); \ id_map[collator.getCollatorId()] = &collator; \ addr_to_type[&collator] = collator.getCollatorType(); \ name_map[#name] = &collator; \ diff --git a/dbms/src/TiDB/Collation/Collator.h b/dbms/src/TiDB/Collation/Collator.h index a599cb5e1a3..0c8adb49382 100644 --- a/dbms/src/TiDB/Collation/Collator.h +++ b/dbms/src/TiDB/Collation/Collator.h @@ -407,17 +407,17 @@ using BIN_COLLATOR_PADDING = BinCollator; using BIN_COLLATOR_NON_PADDING = BinCollator; } // namespace TiDB -#define APPLY_FOR_COLLATOR_TYPES_WITH_VARS(VAR_PREFIX, M) \ - M(VAR_PREFIX##_utf8_general_ci, TiDB::GeneralCICollator, TiDB::ITiDBCollator::UTF8_GENERAL_CI) \ - M(VAR_PREFIX##_utf8mb4_general_ci, TiDB::GeneralCICollator, TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI) \ - M(VAR_PREFIX##_utf8_unicode_ci, TiDB::UCACI_0400_PADDING, TiDB::ITiDBCollator::UTF8_UNICODE_CI) \ - M(VAR_PREFIX##_utf8mb4_unicode_ci, TiDB::UCACI_0400_PADDING, TiDB::ITiDBCollator::UTF8MB4_UNICODE_CI) \ +#define APPLY_FOR_COLLATOR_TYPES_WITH_VARS(VAR_PREFIX, M) \ + M(VAR_PREFIX##_utf8_general_ci, TiDB::GeneralCICollator, TiDB::ITiDBCollator::UTF8_GENERAL_CI) \ + M(VAR_PREFIX##_utf8mb4_general_ci, TiDB::GeneralCICollator, TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI) \ + M(VAR_PREFIX##_utf8_unicode_ci, TiDB::UCACI_0400_PADDING, TiDB::ITiDBCollator::UTF8_UNICODE_CI) \ + M(VAR_PREFIX##_utf8mb4_unicode_ci, TiDB::UCACI_0400_PADDING, TiDB::ITiDBCollator::UTF8MB4_UNICODE_CI) \ M(VAR_PREFIX##_utf8mb4_0900_ai_ci, TiDB::UCACI_0900_NON_PADDING, TiDB::ITiDBCollator::UTF8MB4_0900_AI_CI) \ - M(VAR_PREFIX##_utf8mb4_0900_bin, TiDB::UTF8MB4_0900_BIN_TYPE, TiDB::ITiDBCollator::UTF8MB4_0900_BIN) \ - M(VAR_PREFIX##_utf8mb4_bin, TiDB::UTF8MB4_BIN_TYPE, TiDB::ITiDBCollator::UTF8MB4_BIN) \ - M(VAR_PREFIX##_latin1_bin, TiDB::BIN_COLLATOR_PADDING, TiDB::ITiDBCollator::LATIN1_BIN) \ - M(VAR_PREFIX##_binary, TiDB::BIN_COLLATOR_NON_PADDING, TiDB::ITiDBCollator::BINARY) \ - M(VAR_PREFIX##_ascii_bin, TiDB::BIN_COLLATOR_PADDING, TiDB::ITiDBCollator::ASCII_BIN) \ + M(VAR_PREFIX##_utf8mb4_0900_bin, TiDB::UTF8MB4_0900_BIN_TYPE, TiDB::ITiDBCollator::UTF8MB4_0900_BIN) \ + M(VAR_PREFIX##_utf8mb4_bin, TiDB::UTF8MB4_BIN_TYPE, TiDB::ITiDBCollator::UTF8MB4_BIN) \ + M(VAR_PREFIX##_latin1_bin, TiDB::BIN_COLLATOR_PADDING, TiDB::ITiDBCollator::LATIN1_BIN) \ + M(VAR_PREFIX##_binary, TiDB::BIN_COLLATOR_NON_PADDING, TiDB::ITiDBCollator::BINARY) \ + M(VAR_PREFIX##_ascii_bin, TiDB::BIN_COLLATOR_PADDING, TiDB::ITiDBCollator::ASCII_BIN) \ M(VAR_PREFIX##_utf8_bin, TiDB::UTF8MB4_BIN_TYPE, TiDB::ITiDBCollator::UTF8_BIN) #define APPLY_FOR_COLLATOR_TYPES(M) APPLY_FOR_COLLATOR_TYPES_WITH_VARS(tmp, M)