diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 5c12cb09a56..992faccb6bb 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -834,55 +834,6 @@ void ColumnString::serializeToPosForCmpColumnArray( } } -void ColumnString::serializeToPosForCmpColumnArray( - PaddedPODArray & pos, - size_t start, - size_t length, - bool has_null, - const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const -{ - if (has_null) - { - if likely (collator != nullptr) - serializeToPosForColumnArrayImplType( - pos, - start, - length, - array_offsets, - collator, - sort_key_container); - else - serializeToPosForColumnArrayImplType( - pos, - start, - length, - array_offsets, - nullptr, - nullptr); - } - else - { - if likely (collator != nullptr) - serializeToPosForColumnArrayImplType( - pos, - start, - length, - array_offsets, - collator, - sort_key_container); - else - serializeToPosForColumnArrayImplType( - pos, - start, - length, - array_offsets, - nullptr, - nullptr); - } -} - void ColumnString::serializeToPosForColumnArray( PaddedPODArray & pos, size_t start, @@ -1049,92 +1000,89 @@ void ColumnString::deserializeAndInsertFromPosImpl( size_t size = pos.size(); #ifdef TIFLASH_ENABLE_AVX_SUPPORT - if constexpr (!add_terminating_zero) + if (use_nt_align_buffer) { - if (use_nt_align_buffer) + bool is_offset_aligned = reinterpret_cast(&offsets[prev_size]) % FULL_VECTOR_SIZE_AVX2 == 0; + bool is_char_aligned = reinterpret_cast(&chars[char_size]) % FULL_VECTOR_SIZE_AVX2 == 0; + if likely (is_offset_aligned && is_char_aligned) { - bool is_offset_aligned = reinterpret_cast(&offsets[prev_size]) % FULL_VECTOR_SIZE_AVX2 == 0; - bool is_char_aligned = reinterpret_cast(&chars[char_size]) % FULL_VECTOR_SIZE_AVX2 == 0; - if likely (is_offset_aligned && is_char_aligned) - { - if unlikely (align_buffer_ptrs == nullptr) - align_buffer_ptrs = std::make_unique(2); + if unlikely (align_buffer_ptrs == nullptr) + align_buffer_ptrs = std::make_unique(2); - NTAlignBufferAVX2 & saved_char_buffer = align_buffer_ptrs[0].getBuffer(); - UInt8 char_buffer_size = align_buffer_ptrs[0].getSize(); - NTAlignBufferAVX2 & offset_buffer = align_buffer_ptrs[1].getBuffer(); - UInt8 offset_buffer_size = align_buffer_ptrs[1].getSize(); + NTAlignBufferAVX2 & saved_char_buffer = align_buffer_ptrs[0].getBuffer(); + UInt8 char_buffer_size = align_buffer_ptrs[0].getSize(); + NTAlignBufferAVX2 & offset_buffer = align_buffer_ptrs[1].getBuffer(); + UInt8 offset_buffer_size = align_buffer_ptrs[1].getSize(); - /// Add 15 bytes padding in order to use memcpyMax64BAllowReadWriteOverflow15 - struct PaddedNTAlignBuffer - { - NTAlignBufferAVX2 buffer; - char padding[15]{}; - } padded_align_buf; + /// Add 15 bytes padding in order to use memcpyMax64BAllowReadWriteOverflow15 + struct PaddedNTAlignBuffer + { + NTAlignBufferAVX2 buffer; + char padding[15]{}; + } padded_align_buf; - NTAlignBufferAVX2 & char_buffer = padded_align_buf.buffer; + NTAlignBufferAVX2 & char_buffer = padded_align_buf.buffer; - tiflash_compiler_builtin_memcpy(&char_buffer, &saved_char_buffer, sizeof(NTAlignBufferAVX2)); - SCOPE_EXIT({ - tiflash_compiler_builtin_memcpy(&saved_char_buffer, &char_buffer, sizeof(NTAlignBufferAVX2)); - align_buffer_ptrs[0].setSize(char_buffer_size); - align_buffer_ptrs[1].setSize(offset_buffer_size); - }); + tiflash_compiler_builtin_memcpy(&char_buffer, &saved_char_buffer, sizeof(NTAlignBufferAVX2)); + SCOPE_EXIT({ + tiflash_compiler_builtin_memcpy(&saved_char_buffer, &char_buffer, sizeof(NTAlignBufferAVX2)); + align_buffer_ptrs[0].setSize(char_buffer_size); + align_buffer_ptrs[1].setSize(offset_buffer_size); + }); - offsets.reserve(offsets.size() + size + offset_buffer_size / sizeof(size_t)); - for (size_t i = 0; i < size; ++i) - { - UInt32 str_size; - tiflash_compiler_builtin_memcpy(&str_size, pos[i], sizeof(UInt32)); - pos[i] += sizeof(UInt32); + offsets.reserve(offsets.size() + size + offset_buffer_size / sizeof(size_t)); + for (size_t i = 0; i < size; ++i) + { + UInt32 str_size; + tiflash_compiler_builtin_memcpy(&str_size, pos[i], sizeof(UInt32)); + pos[i] += sizeof(UInt32); - auto * p = pos[i]; - while (true) + auto * p = pos[i]; + while (true) + { + UInt8 remain = FULL_VECTOR_SIZE_AVX2 - char_buffer_size; + if (remain > str_size) { - UInt8 remain = FULL_VECTOR_SIZE_AVX2 - char_buffer_size; - if (remain > str_size) - { - memcpyMax64BAllowReadWriteOverflow15(&char_buffer.data[char_buffer_size], p, str_size); - p += str_size; - char_buffer_size += str_size; - break; - } - - memcpyMax64BAllowReadWriteOverflow15(&char_buffer.data[char_buffer_size], p, remain); - p += remain; - chars.resize(char_size + FULL_VECTOR_SIZE_AVX2, FULL_VECTOR_SIZE_AVX2); - nonTemporalStore64B(&chars[char_size], char_buffer); - char_size += FULL_VECTOR_SIZE_AVX2; - char_buffer_size = 0; - if (remain == str_size) - break; - str_size -= remain; + memcpyMax64BAllowReadWriteOverflow15(&char_buffer.data[char_buffer_size], p, str_size); + p += str_size; + char_buffer_size += str_size; + break; } - pos[i] = p; - size_t offset = char_size + char_buffer_size; - tiflash_compiler_builtin_memcpy(&offset_buffer.data[offset_buffer_size], &offset, sizeof(size_t)); - offset_buffer_size += sizeof(size_t); - static_assert(FULL_VECTOR_SIZE_AVX2 % sizeof(size_t) == 0); - if unlikely (offset_buffer_size == FULL_VECTOR_SIZE_AVX2) - { - offsets.resize(prev_size + FULL_VECTOR_SIZE_AVX2 / sizeof(size_t), FULL_VECTOR_SIZE_AVX2); - nonTemporalStore64B(&offsets[prev_size], offset_buffer); - prev_size += FULL_VECTOR_SIZE_AVX2 / sizeof(size_t); - offset_buffer_size = 0; - } + memcpyMax64BAllowReadWriteOverflow15(&char_buffer.data[char_buffer_size], p, remain); + p += remain; + chars.resize(char_size + FULL_VECTOR_SIZE_AVX2, FULL_VECTOR_SIZE_AVX2); + nonTemporalStore64B(&chars[char_size], char_buffer); + char_size += FULL_VECTOR_SIZE_AVX2; + char_buffer_size = 0; + if (remain == str_size) + break; + str_size -= remain; } + pos[i] = p; - _mm_sfence(); - return; + size_t offset = char_size + char_buffer_size; + tiflash_compiler_builtin_memcpy(&offset_buffer.data[offset_buffer_size], &offset, sizeof(size_t)); + offset_buffer_size += sizeof(size_t); + static_assert(FULL_VECTOR_SIZE_AVX2 % sizeof(size_t) == 0); + if unlikely (offset_buffer_size == FULL_VECTOR_SIZE_AVX2) + { + offsets.resize(prev_size + FULL_VECTOR_SIZE_AVX2 / sizeof(size_t), FULL_VECTOR_SIZE_AVX2); + nonTemporalStore64B(&offsets[prev_size], offset_buffer); + prev_size += FULL_VECTOR_SIZE_AVX2 / sizeof(size_t); + offset_buffer_size = 0; + } } + + _mm_sfence(); + return; } + } RUNTIME_CHECK_MSG( align_buffer_ptrs == nullptr, "align_buffer_ptrs is not nullptr but use_nt_align_buffer({}) is false or data is unaligned", use_nt_align_buffer); - } #endif offsets.resize(prev_size + size); @@ -1143,20 +1091,9 @@ void ColumnString::deserializeAndInsertFromPosImpl( UInt32 str_size; tiflash_compiler_builtin_memcpy(&str_size, pos[i], sizeof(UInt32)); pos[i] += sizeof(UInt32); - - if constexpr (add_terminating_zero) - chars.resize(char_size + str_size + 1); - else - chars.resize(char_size + str_size); - + chars.resize(char_size + str_size); memcpySmallAllowReadWriteOverflow15(&chars[char_size], pos[i], str_size); char_size += str_size; - - if constexpr (add_terminating_zero) - { - chars[char_size] = 0; - char_size++; - } offsets[prev_size + i] = char_size; pos[i] += str_size; }