Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support comparison semantics for batch serialize/deserialize of Column #9756

Open
wants to merge 31 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e4b57c8
basically done
guo-shaoge Dec 30, 2024
1574825
fix compilation
guo-shaoge Dec 31, 2024
4203af0
fmt
guo-shaoge Dec 31, 2024
29021b2
compile && nt_optimization
guo-shaoge Dec 31, 2024
a3cd638
unit test
guo-shaoge Jan 1, 2025
d155847
refine
guo-shaoge Jan 1, 2025
d3b0300
fix
guo-shaoge Jan 2, 2025
e8564aa
Merge branch 'master' into batch_serialize
guo-shaoge Jan 2, 2025
abd55ac
test new impl
guo-shaoge Jan 3, 2025
4cac26a
Merge branch 'batch_serialize' of github.com:guo-shaoge/tiflash into …
guo-shaoge Jan 3, 2025
c07d13a
test ci impl
guo-shaoge Jan 3, 2025
086b630
Revert "test ci impl"
guo-shaoge Jan 6, 2025
db8d490
Revert "test new impl"
guo-shaoge Jan 6, 2025
84ee65b
change name
guo-shaoge Jan 6, 2025
3800d0f
is_fast -> ensure_unique
guo-shaoge Jan 6, 2025
a6fac1f
batchSerializeImpl -> serializeToPosImpl
guo-shaoge Jan 6, 2025
19982d3
ci
guo-shaoge Jan 6, 2025
47cdf91
refine
guo-shaoge Jan 7, 2025
1342f6a
Merge branch 'master' of github.com:pingcap/tiflash into batch_serialize
guo-shaoge Jan 8, 2025
2a6a5f5
refine
guo-shaoge Jan 10, 2025
7d910e5
refine
guo-shaoge Jan 10, 2025
6a2b333
refine
guo-shaoge Jan 10, 2025
82059d6
tmp save
guo-shaoge Jan 13, 2025
cff4dc2
sortKey no virtual function call
guo-shaoge Jan 13, 2025
2b32e79
avoid sortKey virtual function call
guo-shaoge Jan 13, 2025
25406de
fmt
guo-shaoge Jan 13, 2025
a9f86b0
refine
guo-shaoge Jan 13, 2025
4c6f931
refine
guo-shaoge Jan 14, 2025
5283e25
fix
guo-shaoge Jan 17, 2025
8ee2a63
refine
guo-shaoge Jan 17, 2025
32a9e9d
refine
guo-shaoge Jan 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions dbms/src/Columns/ColumnAggregateFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,27 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega

const char * deserializeAndInsertFromArena(const char * src_arena, const TiDB::TiDBCollatorPtr &) override;

void countSerializeByteSizeForCmp(
PaddedPODArray<size_t> & /* byte_size */,
const TiDB::TiDBCollatorPtr & /* collator */) const override
{
throw Exception(
"Method countSerializeByteSizeForCmp is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void countSerializeByteSize(PaddedPODArray<size_t> & /* byte_size */) const override
{
throw Exception("Method countSerializeByteSize is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void countSerializeByteSizeForCmpColumnArray(
PaddedPODArray<size_t> & /* byte_size */,
const IColumn::Offsets & /* offsets */,
const TiDB::TiDBCollatorPtr & /* collator */) const override
{
throw Exception(
"Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void countSerializeByteSizeForColumnArray(
PaddedPODArray<size_t> & /* byte_size */,
const IColumn::Offsets & /* offsets */) const override
Expand All @@ -178,6 +195,16 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega
ErrorCodes::NOT_IMPLEMENTED);
}

void serializeToPosForCmp(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
size_t /* length */,
bool /* has_null */,
const TiDB::TiDBCollatorPtr & /* collator */,
String * /* sort_key_container */) const override
{
throw Exception("Method serializeToPosForCmp is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void serializeToPos(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
Expand All @@ -186,6 +213,19 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega
{
throw Exception("Method serializeToPos is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void serializeToPosForCmpColumnArray(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
size_t /* length */,
bool /* has_null */,
const IColumn::Offsets & /* offsets */,
const TiDB::TiDBCollatorPtr & /* collator */,
String * /* sort_key_container */) const override
{
throw Exception(
"Method serializeToPosForCmpColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void serializeToPosForColumnArray(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
Expand All @@ -198,12 +238,32 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega
ErrorCodes::NOT_IMPLEMENTED);
}

void deserializeForCmpAndInsertFromPos(
PaddedPODArray<char *> & /* pos */,
bool /* use_nt_align_buffer */,
const TiDB::TiDBCollatorPtr & /* collator */) override
{
throw Exception(
"Method deserializeForCmpAndInsertFromPos is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void deserializeAndInsertFromPos(PaddedPODArray<char *> & /* pos */, bool /* use_nt_align_buffer */) override
{
throw Exception(
"Method deserializeAndInsertFromPos is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}

void deserializeForCmpAndInsertFromPosColumnArray(
PaddedPODArray<char *> & /* pos */,
const IColumn::Offsets & /* array_offsets */,
bool /* use_nt_align_buffer */,
const TiDB::TiDBCollatorPtr & /* collator */) override
{
throw Exception(
"Method deserializeForCmpAndInsertFromPosColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void deserializeAndInsertFromPosForColumnArray(
PaddedPODArray<char *> & /* pos */,
const IColumn::Offsets & /* array_offsets */,
Expand Down
74 changes: 67 additions & 7 deletions dbms/src/Columns/ColumnArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,21 @@ const char * ColumnArray::deserializeAndInsertFromArena(const char * pos, const
return pos;
}

void ColumnArray::countSerializeByteSizeForCmp(
PaddedPODArray<size_t> & byte_size,
const TiDB::TiDBCollatorPtr & collator) const
{
countSerializeByteSizeImpl<true>(byte_size, collator);
}

void ColumnArray::countSerializeByteSize(PaddedPODArray<size_t> & byte_size) const
{
countSerializeByteSizeImpl<false>(byte_size, nullptr);
}

template <bool for_compare>
void ColumnArray::countSerializeByteSizeImpl(PaddedPODArray<size_t> & byte_size, const TiDB::TiDBCollatorPtr & collator)
const
{
RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size());

Expand All @@ -237,19 +251,41 @@ void ColumnArray::countSerializeByteSize(PaddedPODArray<size_t> & byte_size) con
for (size_t i = 0; i < size; ++i)
byte_size[i] += sizeof(UInt32);

getData().countSerializeByteSizeForColumnArray(byte_size, getOffsets());
if constexpr (for_compare)
getData().countSerializeByteSizeForCmpColumnArray(byte_size, getOffsets(), collator);
else
getData().countSerializeByteSizeForColumnArray(byte_size, getOffsets());
}

void ColumnArray::serializeToPosForCmp(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
bool has_null,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
if (has_null)
serializeToPosImpl<true, true>(pos, start, length, collator, sort_key_container);
else
serializeToPosImpl<false, true>(pos, start, length, collator, sort_key_container);
}

void ColumnArray::serializeToPos(PaddedPODArray<char *> & pos, size_t start, size_t length, bool has_null) const
{
if (has_null)
serializeToPosImpl<true>(pos, start, length);
serializeToPosImpl<true, false>(pos, start, length, nullptr, nullptr);
else
serializeToPosImpl<false>(pos, start, length);
serializeToPosImpl<false, false>(pos, start, length, nullptr, nullptr);
}

template <bool has_null>
void ColumnArray::serializeToPosImpl(PaddedPODArray<char *> & pos, size_t start, size_t length) const
template <bool has_null, bool for_compare>
void ColumnArray::serializeToPosImpl(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size());
RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size());
Expand All @@ -267,10 +303,31 @@ void ColumnArray::serializeToPosImpl(PaddedPODArray<char *> & pos, size_t start,
pos[i] += sizeof(UInt32);
}

getData().serializeToPosForColumnArray(pos, start, length, has_null, getOffsets());
if constexpr (for_compare)
getData()
.serializeToPosForCmpColumnArray(pos, start, length, has_null, getOffsets(), collator, sort_key_container);
else
getData().serializeToPosForColumnArray(pos, start, length, has_null, getOffsets());
}

void ColumnArray::deserializeForCmpAndInsertFromPos(
PaddedPODArray<char *> & pos,
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator)
{
deserializeAndInsertFromPosImpl<true>(pos, use_nt_align_buffer, collator);
}

void ColumnArray::deserializeAndInsertFromPos(PaddedPODArray<char *> & pos, bool use_nt_align_buffer)
{
deserializeAndInsertFromPosImpl<false>(pos, use_nt_align_buffer, nullptr);
}

template <bool for_compare>
void ColumnArray::deserializeAndInsertFromPosImpl(
PaddedPODArray<char *> & pos,
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator)
{
auto & offsets = getOffsets();
size_t prev_size = offsets.size();
Expand All @@ -285,7 +342,10 @@ void ColumnArray::deserializeAndInsertFromPos(PaddedPODArray<char *> & pos, bool
pos[i] += sizeof(UInt32);
}

getData().deserializeAndInsertFromPosForColumnArray(pos, offsets, use_nt_align_buffer);
if constexpr (for_compare)
getData().deserializeForCmpAndInsertFromPosColumnArray(pos, offsets, use_nt_align_buffer, collator);
else
getData().deserializeAndInsertFromPosForColumnArray(pos, offsets, use_nt_align_buffer);
}

void ColumnArray::flushNTAlignBuffer()
Expand Down
66 changes: 64 additions & 2 deletions dbms/src/Columns/ColumnArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,23 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>

ColumnArray(const ColumnArray &) = default;

template <bool for_compare>
void countSerializeByteSizeImpl(PaddedPODArray<size_t> & byte_size, const TiDB::TiDBCollatorPtr & collator) const;

template <bool has_null, bool for_compare>
void serializeToPosImpl(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const;

template <bool for_compare>
void deserializeAndInsertFromPosImpl(
PaddedPODArray<char *> & pos,
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator);

public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
Expand Down Expand Up @@ -82,7 +99,19 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>
String &) const override;
const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override;

void countSerializeByteSizeForCmp(PaddedPODArray<size_t> & byte_size, const TiDB::TiDBCollatorPtr & collator)
const override;
void countSerializeByteSize(PaddedPODArray<size_t> & byte_size) const override;

void countSerializeByteSizeForCmpColumnArray(
PaddedPODArray<size_t> & /* byte_size */,
const IColumn::Offsets & /* array_offsets */,
const TiDB::TiDBCollatorPtr & /* collator */) const override
{
throw Exception(
"Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void countSerializeByteSizeForColumnArray(
PaddedPODArray<size_t> & /* byte_size */,
const IColumn::Offsets & /* array_offsets */) const override
Expand All @@ -92,10 +121,28 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>
ErrorCodes::NOT_IMPLEMENTED);
}

void serializeToPosForCmp(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
bool has_null,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const override;
void serializeToPos(PaddedPODArray<char *> & pos, size_t start, size_t length, bool has_null) const override;
template <bool has_null>
void serializeToPosImpl(PaddedPODArray<char *> & pos, size_t start, size_t length) const;

void serializeToPosForCmpColumnArray(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
size_t /* length */,
bool /* has_null */,
const IColumn::Offsets & /* array_offsets */,
const TiDB::TiDBCollatorPtr & /* collator */,
String * /* sort_key_container */) const override
{
throw Exception(
"Method serializeToPosForCmpColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void serializeToPosForColumnArray(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
Expand All @@ -108,7 +155,22 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>
ErrorCodes::NOT_IMPLEMENTED);
}

void deserializeForCmpAndInsertFromPos(
PaddedPODArray<char *> & pos,
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator) override;
void deserializeAndInsertFromPos(PaddedPODArray<char *> & pos, bool use_nt_align_buffer) override;

void deserializeForCmpAndInsertFromPosColumnArray(
PaddedPODArray<char *> & /* pos */,
const IColumn::Offsets & /* array_offsets */,
bool /* use_nt_align_buffer */,
const TiDB::TiDBCollatorPtr & /* collator */) override
{
throw Exception(
"Method deserializeForCmpAndInsertFromPosColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void deserializeAndInsertFromPosForColumnArray(
PaddedPODArray<char *> & /* pos */,
const IColumn::Offsets & /* array_offsets */,
Expand Down
Loading