Skip to content

Commit

Permalink
refine
Browse files Browse the repository at this point in the history
Signed-off-by: guo-shaoge <[email protected]>
  • Loading branch information
guo-shaoge committed Jan 22, 2025
1 parent 19c3a84 commit 30a5b1c
Showing 1 changed file with 64 additions and 127 deletions.
191 changes: 64 additions & 127 deletions dbms/src/Columns/ColumnString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -834,55 +834,6 @@ void ColumnString::serializeToPosForCmpColumnArray(
}
}

void ColumnString::serializeToPosForCmpColumnArray(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
bool has_null,
const IColumn::Offsets & array_offsets,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
if (has_null)
{
if likely (collator != nullptr)
serializeToPosForColumnArrayImplType</*has_null=*/true, /*has_collator=*/true>(
pos,
start,
length,
array_offsets,
collator,
sort_key_container);
else
serializeToPosForColumnArrayImplType</*has_null=*/true, /*has_collator=*/false>(
pos,
start,
length,
array_offsets,
nullptr,
nullptr);
}
else
{
if likely (collator != nullptr)
serializeToPosForColumnArrayImplType</*has_null=*/false, /*has_collator=*/true>(
pos,
start,
length,
array_offsets,
collator,
sort_key_container);
else
serializeToPosForColumnArrayImplType</*has_null=*/false, /*has_collator=*/true>(
pos,
start,
length,
array_offsets,
nullptr,
nullptr);
}
}

void ColumnString::serializeToPosForColumnArray(
PaddedPODArray<char *> & pos,
size_t start,
Expand Down Expand Up @@ -1049,92 +1000,89 @@ void ColumnString::deserializeAndInsertFromPosImpl(
size_t size = pos.size();

#ifdef TIFLASH_ENABLE_AVX_SUPPORT
if constexpr (!add_terminating_zero)
if (use_nt_align_buffer)
{
if (use_nt_align_buffer)
bool is_offset_aligned = reinterpret_cast<std::uintptr_t>(&offsets[prev_size]) % FULL_VECTOR_SIZE_AVX2 == 0;
bool is_char_aligned = reinterpret_cast<std::uintptr_t>(&chars[char_size]) % FULL_VECTOR_SIZE_AVX2 == 0;
if likely (is_offset_aligned && is_char_aligned)
{
bool is_offset_aligned = reinterpret_cast<std::uintptr_t>(&offsets[prev_size]) % FULL_VECTOR_SIZE_AVX2 == 0;
bool is_char_aligned = reinterpret_cast<std::uintptr_t>(&chars[char_size]) % FULL_VECTOR_SIZE_AVX2 == 0;
if likely (is_offset_aligned && is_char_aligned)
{
if unlikely (align_buffer_ptrs == nullptr)
align_buffer_ptrs = std::make_unique<ColumnNTAlignBufferAVX2[]>(2);
if unlikely (align_buffer_ptrs == nullptr)
align_buffer_ptrs = std::make_unique<ColumnNTAlignBufferAVX2[]>(2);

NTAlignBufferAVX2 & saved_char_buffer = align_buffer_ptrs[0].getBuffer();
UInt8 char_buffer_size = align_buffer_ptrs[0].getSize();
NTAlignBufferAVX2 & offset_buffer = align_buffer_ptrs[1].getBuffer();
UInt8 offset_buffer_size = align_buffer_ptrs[1].getSize();
NTAlignBufferAVX2 & saved_char_buffer = align_buffer_ptrs[0].getBuffer();
UInt8 char_buffer_size = align_buffer_ptrs[0].getSize();
NTAlignBufferAVX2 & offset_buffer = align_buffer_ptrs[1].getBuffer();
UInt8 offset_buffer_size = align_buffer_ptrs[1].getSize();

/// Add 15 bytes padding in order to use memcpyMax64BAllowReadWriteOverflow15
struct PaddedNTAlignBuffer
{
NTAlignBufferAVX2 buffer;
char padding[15]{};
} padded_align_buf;
/// Add 15 bytes padding in order to use memcpyMax64BAllowReadWriteOverflow15
struct PaddedNTAlignBuffer
{
NTAlignBufferAVX2 buffer;
char padding[15]{};
} padded_align_buf;

NTAlignBufferAVX2 & char_buffer = padded_align_buf.buffer;
NTAlignBufferAVX2 & char_buffer = padded_align_buf.buffer;

tiflash_compiler_builtin_memcpy(&char_buffer, &saved_char_buffer, sizeof(NTAlignBufferAVX2));
SCOPE_EXIT({
tiflash_compiler_builtin_memcpy(&saved_char_buffer, &char_buffer, sizeof(NTAlignBufferAVX2));
align_buffer_ptrs[0].setSize(char_buffer_size);
align_buffer_ptrs[1].setSize(offset_buffer_size);
});
tiflash_compiler_builtin_memcpy(&char_buffer, &saved_char_buffer, sizeof(NTAlignBufferAVX2));
SCOPE_EXIT({
tiflash_compiler_builtin_memcpy(&saved_char_buffer, &char_buffer, sizeof(NTAlignBufferAVX2));
align_buffer_ptrs[0].setSize(char_buffer_size);
align_buffer_ptrs[1].setSize(offset_buffer_size);
});

offsets.reserve(offsets.size() + size + offset_buffer_size / sizeof(size_t));
for (size_t i = 0; i < size; ++i)
{
UInt32 str_size;
tiflash_compiler_builtin_memcpy(&str_size, pos[i], sizeof(UInt32));
pos[i] += sizeof(UInt32);
offsets.reserve(offsets.size() + size + offset_buffer_size / sizeof(size_t));
for (size_t i = 0; i < size; ++i)
{
UInt32 str_size;
tiflash_compiler_builtin_memcpy(&str_size, pos[i], sizeof(UInt32));
pos[i] += sizeof(UInt32);

auto * p = pos[i];
while (true)
auto * p = pos[i];
while (true)
{
UInt8 remain = FULL_VECTOR_SIZE_AVX2 - char_buffer_size;
if (remain > str_size)
{
UInt8 remain = FULL_VECTOR_SIZE_AVX2 - char_buffer_size;
if (remain > str_size)
{
memcpyMax64BAllowReadWriteOverflow15(&char_buffer.data[char_buffer_size], p, str_size);
p += str_size;
char_buffer_size += str_size;
break;
}

memcpyMax64BAllowReadWriteOverflow15(&char_buffer.data[char_buffer_size], p, remain);
p += remain;
chars.resize(char_size + FULL_VECTOR_SIZE_AVX2, FULL_VECTOR_SIZE_AVX2);
nonTemporalStore64B(&chars[char_size], char_buffer);
char_size += FULL_VECTOR_SIZE_AVX2;
char_buffer_size = 0;
if (remain == str_size)
break;
str_size -= remain;
memcpyMax64BAllowReadWriteOverflow15(&char_buffer.data[char_buffer_size], p, str_size);
p += str_size;
char_buffer_size += str_size;
break;
}
pos[i] = p;

size_t offset = char_size + char_buffer_size;
tiflash_compiler_builtin_memcpy(&offset_buffer.data[offset_buffer_size], &offset, sizeof(size_t));
offset_buffer_size += sizeof(size_t);
static_assert(FULL_VECTOR_SIZE_AVX2 % sizeof(size_t) == 0);
if unlikely (offset_buffer_size == FULL_VECTOR_SIZE_AVX2)
{
offsets.resize(prev_size + FULL_VECTOR_SIZE_AVX2 / sizeof(size_t), FULL_VECTOR_SIZE_AVX2);
nonTemporalStore64B(&offsets[prev_size], offset_buffer);
prev_size += FULL_VECTOR_SIZE_AVX2 / sizeof(size_t);
offset_buffer_size = 0;
}
memcpyMax64BAllowReadWriteOverflow15(&char_buffer.data[char_buffer_size], p, remain);
p += remain;
chars.resize(char_size + FULL_VECTOR_SIZE_AVX2, FULL_VECTOR_SIZE_AVX2);
nonTemporalStore64B(&chars[char_size], char_buffer);
char_size += FULL_VECTOR_SIZE_AVX2;
char_buffer_size = 0;
if (remain == str_size)
break;
str_size -= remain;
}
pos[i] = p;

_mm_sfence();
return;
size_t offset = char_size + char_buffer_size;
tiflash_compiler_builtin_memcpy(&offset_buffer.data[offset_buffer_size], &offset, sizeof(size_t));
offset_buffer_size += sizeof(size_t);
static_assert(FULL_VECTOR_SIZE_AVX2 % sizeof(size_t) == 0);
if unlikely (offset_buffer_size == FULL_VECTOR_SIZE_AVX2)
{
offsets.resize(prev_size + FULL_VECTOR_SIZE_AVX2 / sizeof(size_t), FULL_VECTOR_SIZE_AVX2);
nonTemporalStore64B(&offsets[prev_size], offset_buffer);
prev_size += FULL_VECTOR_SIZE_AVX2 / sizeof(size_t);
offset_buffer_size = 0;
}
}

_mm_sfence();
return;
}
}

RUNTIME_CHECK_MSG(
align_buffer_ptrs == nullptr,
"align_buffer_ptrs is not nullptr but use_nt_align_buffer({}) is false or data is unaligned",
use_nt_align_buffer);
}
#endif

offsets.resize(prev_size + size);
Expand All @@ -1143,20 +1091,9 @@ void ColumnString::deserializeAndInsertFromPosImpl(
UInt32 str_size;
tiflash_compiler_builtin_memcpy(&str_size, pos[i], sizeof(UInt32));
pos[i] += sizeof(UInt32);

if constexpr (add_terminating_zero)
chars.resize(char_size + str_size + 1);
else
chars.resize(char_size + str_size);

chars.resize(char_size + str_size);
memcpySmallAllowReadWriteOverflow15(&chars[char_size], pos[i], str_size);
char_size += str_size;

if constexpr (add_terminating_zero)
{
chars[char_size] = 0;
char_size++;
}
offsets[prev_size + i] = char_size;
pos[i] += str_size;
}
Expand Down

0 comments on commit 30a5b1c

Please sign in to comment.