Skip to content

Commit

Permalink
fmt
Browse files Browse the repository at this point in the history
Signed-off-by: guo-shaoge <[email protected]>
  • Loading branch information
guo-shaoge committed Jan 13, 2025
1 parent 2b32e79 commit 25406de
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 73 deletions.
10 changes: 2 additions & 8 deletions dbms/src/Columns/ColumnArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,14 +304,8 @@ void ColumnArray::serializeToPosImpl(
}

if constexpr (for_compare)
getData().serializeToPosForCmpColumnArray(
pos,
start,
length,
has_null,
getOffsets(),
collator,
sort_key_container);
getData()
.serializeToPosForCmpColumnArray(pos, start, length, has_null, getOffsets(), collator, sort_key_container);
else
getData().serializeToPosForColumnArray(pos, start, length, has_null, getOffsets());
}
Expand Down
6 changes: 2 additions & 4 deletions dbms/src/Columns/ColumnNullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,10 +373,8 @@ void ColumnNullable::deserializeForCmpAndInsertFromPosColumnArray(
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator)
{
getNullMapColumn()
.deserializeForCmpAndInsertFromPosColumnArray(pos, array_offsets, use_nt_align_buffer, collator);
getNestedColumn()
.deserializeForCmpAndInsertFromPosColumnArray(pos, array_offsets, use_nt_align_buffer, collator);
getNullMapColumn().deserializeForCmpAndInsertFromPosColumnArray(pos, array_offsets, use_nt_align_buffer, collator);
getNestedColumn().deserializeForCmpAndInsertFromPosColumnArray(pos, array_offsets, use_nt_align_buffer, collator);
}
void ColumnNullable::deserializeAndInsertFromPosForColumnArray(
PaddedPODArray<char *> & pos,
Expand Down
44 changes: 31 additions & 13 deletions dbms/src/Columns/ColumnString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#include <Columns/filterColumn.h>
#include <Common/HashTable/Hash.h>
#include <DataStreams/ColumnGathererStream.h>
#include <TiDB/Collation/CollatorUtils.h>
#include <TiDB/Collation/Collator.h>
#include <TiDB/Collation/CollatorUtils.h>
#include <common/memcpy.h>
#include <fmt/core.h>

Expand Down Expand Up @@ -560,21 +560,33 @@ void ColumnString::countSerializeByteSizeForCmpColumnArray(
if likely (collator != nullptr)
{
if (collator->maxBytesForOneChar() > 1)
countSerializeByteSizeForColumnArrayImpl</*has_collator=*/true, /*count_code_points=*/true>(byte_size, array_offsets, collator);
countSerializeByteSizeForColumnArrayImpl</*has_collator=*/true, /*count_code_points=*/true>(
byte_size,
array_offsets,
collator);
else
countSerializeByteSizeForColumnArrayImpl</*has_collator=*/true, /*count_code_points=*/false>(byte_size, array_offsets, collator);
countSerializeByteSizeForColumnArrayImpl</*has_collator=*/true, /*count_code_points=*/false>(
byte_size,
array_offsets,
collator);
}
else
{
countSerializeByteSizeForColumnArrayImpl</*has_collator=*/false, /*count_code_points=*/false>(byte_size, array_offsets, nullptr);
countSerializeByteSizeForColumnArrayImpl</*has_collator=*/false, /*count_code_points=*/false>(
byte_size,
array_offsets,
nullptr);
}
}

void ColumnString::countSerializeByteSizeForColumnArray(
PaddedPODArray<size_t> & byte_size,
const IColumn::Offsets & array_offsets) const
{
countSerializeByteSizeForColumnArrayImpl</*has_collator=*/false, /*count_code_points=*/false>(byte_size, array_offsets, nullptr);
countSerializeByteSizeForColumnArrayImpl</*has_collator=*/false, /*count_code_points=*/false>(
byte_size,
array_offsets,
nullptr);
}

template <bool has_collator, bool count_code_points>
Expand Down Expand Up @@ -699,12 +711,12 @@ void ColumnString::serializeToPosImplType(
{
RUNTIME_CHECK(collator && sort_key_container);

#define M(VAR_NAME, REAL_TYPE, COLLATOR_ID) \
case (COLLATOR_ID): \
{ \
serializeToPosImpl<has_null, has_collator, REAL_TYPE>(pos, start, length, collator, sort_key_container); \
break; \
}
#define M(VAR_NAME, REAL_TYPE, COLLATOR_ID) \
case (COLLATOR_ID): \
{ \
serializeToPosImpl<has_null, has_collator, REAL_TYPE>(pos, start, length, collator, sort_key_container); \
break; \
}

switch (collator->getCollatorId())
{
Expand All @@ -719,7 +731,12 @@ void ColumnString::serializeToPosImplType(
}
else
{
serializeToPosImpl<has_null, has_collator, TiDB::ITiDBCollator>(pos, start, length, collator, sort_key_container);
serializeToPosImpl<has_null, has_collator, TiDB::ITiDBCollator>(
pos,
start,
length,
collator,
sort_key_container);
}
}

Expand Down Expand Up @@ -749,7 +766,8 @@ void ColumnString::serializeToPosImpl(
const void * src = &chars[offsetAt(start + i)];
if constexpr (has_collator)
{
auto sort_key = derived_collator->sortKey(reinterpret_cast<const char *>(src), str_size - 1, *sort_key_container);
auto sort_key
= derived_collator->sortKey(reinterpret_cast<const char *>(src), str_size - 1, *sort_key_container);
str_size = sort_key.size;
src = sort_key.data;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -728,13 +728,7 @@ try
auto col_string = createColumn<String>({"hangzhou", "杭州", "你好世界", "欧元€", "abc里拉₤", "12法郎₣", ""}).column;
testCountSerializeByteSize(
col_string,
{4 + 8,
4 + 6,
4 + 12,
4 + 9,
4 + 12,
4 + 11,
4 + 0},
{4 + 8, 4 + 6, 4 + 12, 4 + 9, 4 + 12, 4 + 11, 4 + 0},
true,
collator_utf8_bin);
testCountSerializeByteSize(
Expand Down
83 changes: 52 additions & 31 deletions dbms/src/TiDB/Collation/Collator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,28 +124,28 @@ bool Pattern<Collator>::match(const char * s, size_t length) const
{
switch (match_types[p_idx])
{
case Match:
if (s_offset < length
&& Collator::regexEq(Collator::decodeChar(s, tmp_s_offset = s_offset), chars[p_idx]))
{
p_idx++;
s_offset = tmp_s_offset;
continue;
}
break;
case One:
if (s_offset < length)
{
p_idx++;
Collator::decodeChar(s, s_offset);
continue;
}
break;
case Any:
next_p_idx = p_idx;
Collator::decodeChar(s, next_s_offset = s_offset);
case Match:
if (s_offset < length
&& Collator::regexEq(Collator::decodeChar(s, tmp_s_offset = s_offset), chars[p_idx]))
{
p_idx++;
s_offset = tmp_s_offset;
continue;
}
break;
case One:
if (s_offset < length)
{
p_idx++;
Collator::decodeChar(s, s_offset);
continue;
}
break;
case Any:
next_p_idx = p_idx;
Collator::decodeChar(s, next_s_offset = s_offset);
p_idx++;
continue;
}
}
if (0 < next_s_offset && next_s_offset <= length)
Expand All @@ -161,11 +161,12 @@ bool Pattern<Collator>::match(const char * s, size_t length) const

template <typename T, bool padding>
inline std::unique_ptr<ITiDBCollator::IPattern> BinCollator<T, padding>::pattern() const
{ return std::make_unique<Pattern<BinCollator<T, padding>>>(); }
{
return std::make_unique<Pattern<BinCollator<T, padding>>>();
}

template <typename T, bool padding>
inline typename BinCollator<T, padding>::CharType
BinCollator<T, padding>::decodeChar(const char * s, size_t & offset)
inline typename BinCollator<T, padding>::CharType BinCollator<T, padding>::decodeChar(const char * s, size_t & offset)
{
if constexpr (std::is_same_v<T, char>)
{
Expand Down Expand Up @@ -198,7 +199,11 @@ int GeneralCICollator::compare(const char * s1, size_t length1, const char * s2,
}

template <bool need_len, bool need_trim>
StringRef GeneralCICollator::convertImpl(const char * s, size_t length, std::string & container, std::vector<size_t> * lens) const
StringRef GeneralCICollator::convertImpl(
const char * s,
size_t length,
std::string & container,
std::vector<size_t> * lens) const
{
std::string_view v;

Expand Down Expand Up @@ -235,7 +240,10 @@ StringRef GeneralCICollator::convertImpl(const char * s, size_t length, std::str
return StringRef(container.data(), total_size);
}

inline GeneralCICollator::CharType GeneralCICollator::decodeChar(const char * s, size_t & offset) { return decodeUtf8Char(s, offset); }
inline GeneralCICollator::CharType GeneralCICollator::decodeChar(const char * s, size_t & offset)
{
return decodeUtf8Char(s, offset);
}

namespace UnicodeCI
{
Expand Down Expand Up @@ -357,7 +365,11 @@ int UCACICollator<T, padding>::compare(const char * s1, size_t length1, const ch

template <typename T, bool padding>
template <bool need_len, bool need_trim>
StringRef UCACICollator<T, padding>::convertImpl(const char * s, size_t length, std::string & container, std::vector<size_t> * lens) const
StringRef UCACICollator<T, padding>::convertImpl(
const char * s,
size_t length,
std::string & container,
std::vector<size_t> * lens) const
{
std::string_view v;

Expand Down Expand Up @@ -403,10 +415,20 @@ StringRef UCACICollator<T, padding>::convertImpl(const char * s, size_t length,
}

template <typename T, bool padding>
inline typename UCACICollator<T, padding>::CharType UCACICollator<T, padding>::decodeChar(const char * s, size_t & offset) { return decodeUtf8Char(s, offset); }
inline typename UCACICollator<T, padding>::CharType UCACICollator<T, padding>::decodeChar(
const char * s,
size_t & offset)
{
return decodeUtf8Char(s, offset);
}

template <typename T, bool padding>
inline void UCACICollator<T, padding>::weight(uint64_t & first, uint64_t & second, size_t & offset, size_t length, const char * s)
inline void UCACICollator<T, padding>::weight(
uint64_t & first,
uint64_t & second,
size_t & offset,
size_t length,
const char * s)
{
if (first == 0)
{
Expand Down Expand Up @@ -683,8 +705,7 @@ struct TiDBCollatorPtrMap

TiDBCollatorPtrMap()
{
#define M(VAR_NAME, REAL_TYPE, COLLATOR_ID) \
static const auto VAR_NAME = REAL_TYPE(COLLATOR_ID);
#define M(VAR_NAME, REAL_TYPE, COLLATOR_ID) static const auto VAR_NAME = REAL_TYPE(COLLATOR_ID);

APPLY_FOR_COLLATOR_TYPES_WITH_VARS(tmp_var, M)
#undef M
Expand All @@ -695,7 +716,7 @@ struct TiDBCollatorPtrMap
#define M(name) \
do \
{ \
auto & collator = (tmp_var_##name); \
auto & collator = (tmp_var_##name); \
id_map[collator.getCollatorId()] = &collator; \
addr_to_type[&collator] = collator.getCollatorType(); \
name_map[#name] = &collator; \
Expand Down
20 changes: 10 additions & 10 deletions dbms/src/TiDB/Collation/Collator.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,17 +407,17 @@ using BIN_COLLATOR_PADDING = BinCollator<char, true>;
using BIN_COLLATOR_NON_PADDING = BinCollator<char, false>;
} // namespace TiDB

#define APPLY_FOR_COLLATOR_TYPES_WITH_VARS(VAR_PREFIX, M) \
M(VAR_PREFIX##_utf8_general_ci, TiDB::GeneralCICollator, TiDB::ITiDBCollator::UTF8_GENERAL_CI) \
M(VAR_PREFIX##_utf8mb4_general_ci, TiDB::GeneralCICollator, TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI) \
M(VAR_PREFIX##_utf8_unicode_ci, TiDB::UCACI_0400_PADDING, TiDB::ITiDBCollator::UTF8_UNICODE_CI) \
M(VAR_PREFIX##_utf8mb4_unicode_ci, TiDB::UCACI_0400_PADDING, TiDB::ITiDBCollator::UTF8MB4_UNICODE_CI) \
#define APPLY_FOR_COLLATOR_TYPES_WITH_VARS(VAR_PREFIX, M) \
M(VAR_PREFIX##_utf8_general_ci, TiDB::GeneralCICollator, TiDB::ITiDBCollator::UTF8_GENERAL_CI) \
M(VAR_PREFIX##_utf8mb4_general_ci, TiDB::GeneralCICollator, TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI) \
M(VAR_PREFIX##_utf8_unicode_ci, TiDB::UCACI_0400_PADDING, TiDB::ITiDBCollator::UTF8_UNICODE_CI) \
M(VAR_PREFIX##_utf8mb4_unicode_ci, TiDB::UCACI_0400_PADDING, TiDB::ITiDBCollator::UTF8MB4_UNICODE_CI) \
M(VAR_PREFIX##_utf8mb4_0900_ai_ci, TiDB::UCACI_0900_NON_PADDING, TiDB::ITiDBCollator::UTF8MB4_0900_AI_CI) \
M(VAR_PREFIX##_utf8mb4_0900_bin, TiDB::UTF8MB4_0900_BIN_TYPE, TiDB::ITiDBCollator::UTF8MB4_0900_BIN) \
M(VAR_PREFIX##_utf8mb4_bin, TiDB::UTF8MB4_BIN_TYPE, TiDB::ITiDBCollator::UTF8MB4_BIN) \
M(VAR_PREFIX##_latin1_bin, TiDB::BIN_COLLATOR_PADDING, TiDB::ITiDBCollator::LATIN1_BIN) \
M(VAR_PREFIX##_binary, TiDB::BIN_COLLATOR_NON_PADDING, TiDB::ITiDBCollator::BINARY) \
M(VAR_PREFIX##_ascii_bin, TiDB::BIN_COLLATOR_PADDING, TiDB::ITiDBCollator::ASCII_BIN) \
M(VAR_PREFIX##_utf8mb4_0900_bin, TiDB::UTF8MB4_0900_BIN_TYPE, TiDB::ITiDBCollator::UTF8MB4_0900_BIN) \
M(VAR_PREFIX##_utf8mb4_bin, TiDB::UTF8MB4_BIN_TYPE, TiDB::ITiDBCollator::UTF8MB4_BIN) \
M(VAR_PREFIX##_latin1_bin, TiDB::BIN_COLLATOR_PADDING, TiDB::ITiDBCollator::LATIN1_BIN) \
M(VAR_PREFIX##_binary, TiDB::BIN_COLLATOR_NON_PADDING, TiDB::ITiDBCollator::BINARY) \
M(VAR_PREFIX##_ascii_bin, TiDB::BIN_COLLATOR_PADDING, TiDB::ITiDBCollator::ASCII_BIN) \
M(VAR_PREFIX##_utf8_bin, TiDB::UTF8MB4_BIN_TYPE, TiDB::ITiDBCollator::UTF8_BIN)

#define APPLY_FOR_COLLATOR_TYPES(M) APPLY_FOR_COLLATOR_TYPES_WITH_VARS(tmp, M)

0 comments on commit 25406de

Please sign in to comment.