Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support comparison semantics for batch serialize/deserialize of Column #9756

Merged
merged 33 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e4b57c8
basically done
guo-shaoge Dec 30, 2024
1574825
fix compilation
guo-shaoge Dec 31, 2024
4203af0
fmt
guo-shaoge Dec 31, 2024
29021b2
compile && nt_optimization
guo-shaoge Dec 31, 2024
a3cd638
unit test
guo-shaoge Jan 1, 2025
d155847
refine
guo-shaoge Jan 1, 2025
d3b0300
fix
guo-shaoge Jan 2, 2025
e8564aa
Merge branch 'master' into batch_serialize
guo-shaoge Jan 2, 2025
abd55ac
test new impl
guo-shaoge Jan 3, 2025
4cac26a
Merge branch 'batch_serialize' of github.com:guo-shaoge/tiflash into …
guo-shaoge Jan 3, 2025
c07d13a
test ci impl
guo-shaoge Jan 3, 2025
086b630
Revert "test ci impl"
guo-shaoge Jan 6, 2025
db8d490
Revert "test new impl"
guo-shaoge Jan 6, 2025
84ee65b
change name
guo-shaoge Jan 6, 2025
3800d0f
is_fast -> ensure_unique
guo-shaoge Jan 6, 2025
a6fac1f
batchSerializeImpl -> serializeToPosImpl
guo-shaoge Jan 6, 2025
19982d3
ci
guo-shaoge Jan 6, 2025
47cdf91
refine
guo-shaoge Jan 7, 2025
1342f6a
Merge branch 'master' of github.com:pingcap/tiflash into batch_serialize
guo-shaoge Jan 8, 2025
2a6a5f5
refine
guo-shaoge Jan 10, 2025
7d910e5
refine
guo-shaoge Jan 10, 2025
6a2b333
refine
guo-shaoge Jan 10, 2025
82059d6
tmp save
guo-shaoge Jan 13, 2025
cff4dc2
sortKey no virtual function call
guo-shaoge Jan 13, 2025
2b32e79
avoid sortKey virtual function call
guo-shaoge Jan 13, 2025
25406de
fmt
guo-shaoge Jan 13, 2025
a9f86b0
refine
guo-shaoge Jan 13, 2025
4c6f931
refine
guo-shaoge Jan 14, 2025
5283e25
fix
guo-shaoge Jan 17, 2025
8ee2a63
refine
guo-shaoge Jan 17, 2025
32a9e9d
refine
guo-shaoge Jan 20, 2025
f6c8115
add terminating zero when serialize
guo-shaoge Jan 22, 2025
04cd9f5
Merge branch 'master' into batch_serialize
ti-chi-bot[bot] Jan 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 62 additions & 2 deletions dbms/src/Columns/ColumnAggregateFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,27 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega

const char * deserializeAndInsertFromArena(const char * src_arena, const TiDB::TiDBCollatorPtr &) override;

void countSerializeByteSizeUnique(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make more sense to rename countSerializeByteSizeUnique to countSerializeUniqueByteSize for better readability?
Similarly, the name for other methods could follow the same pattern. E.g. serializeToPosUnique to serializeUniqueToPos and deserializeAndInsertFromPosUnique to deserializeUniqueAndInsertFromPos.

PaddedPODArray<size_t> & /* byte_size */,
const TiDB::TiDBCollatorPtr & /* collator */) const override
{
throw Exception(
"Method countSerializeByteSizeUnique is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void countSerializeByteSize(PaddedPODArray<size_t> & /* byte_size */) const override
{
throw Exception("Method countSerializeByteSize is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void countSerializeByteSizeUniqueForColumnArray(
PaddedPODArray<size_t> & /* byte_size */,
const IColumn::Offsets & /* offsets */,
const TiDB::TiDBCollatorPtr & /* collator */) const override
{
throw Exception(
"Method countSerializeByteSizeUniqueForColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void countSerializeByteSizeForColumnArray(
PaddedPODArray<size_t> & /* byte_size */,
const IColumn::Offsets & /* offsets */) const override
Expand All @@ -178,6 +195,16 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega
ErrorCodes::NOT_IMPLEMENTED);
}

void serializeToPosUnique(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
size_t /* length */,
bool /* has_null */,
const TiDB::TiDBCollatorPtr & /* collator */,
String * /* sort_key_container */) const override
{
throw Exception("Method serializeToPosUnique is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void serializeToPos(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
Expand All @@ -186,6 +213,19 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega
{
throw Exception("Method serializeToPos is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void serializeToPosUniqueForColumnArray(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
size_t /* length */,
bool /* has_null */,
const IColumn::Offsets & /* offsets */,
const TiDB::TiDBCollatorPtr & /* collator */,
String * /* sort_key_container */) const override
{
throw Exception(
"Method serializeToPosUniqueForColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void serializeToPosForColumnArray(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
Expand All @@ -198,14 +238,34 @@ class ColumnAggregateFunction final : public COWPtrHelper<IColumn, ColumnAggrega
ErrorCodes::NOT_IMPLEMENTED);
}

void deserializeAndInsertFromPos(PaddedPODArray<char *> & /* pos */, bool /* use_nt_align_buffer */) override
void deserializeAndInsertFromPosUnique(
PaddedPODArray<const char *> & /* pos */,
bool /* use_nt_align_buffer */,
const TiDB::TiDBCollatorPtr & /* collator */) override
{
throw Exception(
"Method deserializeAndInsertFromPosUnique is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void deserializeAndInsertFromPos(PaddedPODArray<const char *> & /* pos */, bool /* use_nt_align_buffer */) override
{
throw Exception(
"Method deserializeAndInsertFromPos is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}

void deserializeAndInsertFromPosUniqueForColumnArray(
PaddedPODArray<const char *> & /* pos */,
const IColumn::Offsets & /* array_offsets */,
bool /* use_nt_align_buffer */,
const TiDB::TiDBCollatorPtr & /* collator */) override
{
throw Exception(
"Method deserializeAndInsertFromPosUniqueForColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void deserializeAndInsertFromPosForColumnArray(
PaddedPODArray<char *> & /* pos */,
PaddedPODArray<const char *> & /* pos */,
const IColumn::Offsets & /* array_offsets */,
bool /* use_nt_align_buffer */) override
{
Expand Down
82 changes: 74 additions & 8 deletions dbms/src/Columns/ColumnArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,21 @@ const char * ColumnArray::deserializeAndInsertFromArena(const char * pos, const
return pos;
}

void ColumnArray::countSerializeByteSizeUnique(
PaddedPODArray<size_t> & byte_size,
const TiDB::TiDBCollatorPtr & collator) const
{
countSerializeByteSizeImpl<true>(byte_size, collator);
}

void ColumnArray::countSerializeByteSize(PaddedPODArray<size_t> & byte_size) const
{
countSerializeByteSizeImpl<false>(byte_size, nullptr);
}

template <bool ensure_unique>
void ColumnArray::countSerializeByteSizeImpl(PaddedPODArray<size_t> & byte_size, const TiDB::TiDBCollatorPtr & collator)
const
{
RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size());

Expand All @@ -237,19 +251,41 @@ void ColumnArray::countSerializeByteSize(PaddedPODArray<size_t> & byte_size) con
for (size_t i = 0; i < size; ++i)
byte_size[i] += sizeof(UInt32);

getData().countSerializeByteSizeForColumnArray(byte_size, getOffsets());
if constexpr (ensure_unique)
getData().countSerializeByteSizeUniqueForColumnArray(byte_size, getOffsets(), collator);
else
getData().countSerializeByteSizeForColumnArray(byte_size, getOffsets());
}

void ColumnArray::serializeToPosUnique(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
bool has_null,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
if (has_null)
serializeToPosImpl<true, true>(pos, start, length, collator, sort_key_container);
else
serializeToPosImpl<false, true>(pos, start, length, collator, sort_key_container);
}

void ColumnArray::serializeToPos(PaddedPODArray<char *> & pos, size_t start, size_t length, bool has_null) const
{
if (has_null)
serializeToPosImpl<true>(pos, start, length);
serializeToPosImpl<true, false>(pos, start, length, nullptr, nullptr);
else
serializeToPosImpl<false>(pos, start, length);
serializeToPosImpl<false, false>(pos, start, length, nullptr, nullptr);
}

template <bool has_null>
void ColumnArray::serializeToPosImpl(PaddedPODArray<char *> & pos, size_t start, size_t length) const
template <bool has_null, bool ensure_unique>
void ColumnArray::serializeToPosImpl(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size());
RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size());
Expand All @@ -267,10 +303,37 @@ void ColumnArray::serializeToPosImpl(PaddedPODArray<char *> & pos, size_t start,
pos[i] += sizeof(UInt32);
}

getData().serializeToPosForColumnArray(pos, start, length, has_null, getOffsets());
if constexpr (ensure_unique)
getData().serializeToPosUniqueForColumnArray(
pos,
start,
length,
has_null,
getOffsets(),
collator,
sort_key_container);
else
getData().serializeToPosForColumnArray(pos, start, length, has_null, getOffsets());
}

void ColumnArray::deserializeAndInsertFromPos(PaddedPODArray<char *> & pos, bool use_nt_align_buffer)
void ColumnArray::deserializeAndInsertFromPosUnique(
PaddedPODArray<const char *> & pos,
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator)
{
deserializeAndInsertFromPosImpl<true>(pos, use_nt_align_buffer, collator);
}

void ColumnArray::deserializeAndInsertFromPos(PaddedPODArray<const char *> & pos, bool use_nt_align_buffer)
{
deserializeAndInsertFromPosImpl<false>(pos, use_nt_align_buffer, nullptr);
}

template <bool ensure_unique>
void ColumnArray::deserializeAndInsertFromPosImpl(
PaddedPODArray<const char *> & pos,
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator)
{
auto & offsets = getOffsets();
size_t prev_size = offsets.size();
Expand All @@ -285,7 +348,10 @@ void ColumnArray::deserializeAndInsertFromPos(PaddedPODArray<char *> & pos, bool
pos[i] += sizeof(UInt32);
}

getData().deserializeAndInsertFromPosForColumnArray(pos, offsets, use_nt_align_buffer);
if constexpr (ensure_unique)
getData().deserializeAndInsertFromPosUniqueForColumnArray(pos, offsets, use_nt_align_buffer, collator);
else
getData().deserializeAndInsertFromPosForColumnArray(pos, offsets, use_nt_align_buffer);
}

void ColumnArray::flushNTAlignBuffer()
Expand Down
70 changes: 66 additions & 4 deletions dbms/src/Columns/ColumnArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,23 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>

ColumnArray(const ColumnArray &) = default;

template <bool ensure_unique>
void countSerializeByteSizeImpl(PaddedPODArray<size_t> & byte_size, const TiDB::TiDBCollatorPtr & collator) const;

template <bool has_null, bool ensure_unique>
void serializeToPosImpl(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const;

template <bool ensure_unique>
void deserializeAndInsertFromPosImpl(
PaddedPODArray<const char *> & pos,
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator);

public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
Expand Down Expand Up @@ -82,7 +99,19 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>
String &) const override;
const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override;

void countSerializeByteSizeUnique(PaddedPODArray<size_t> & byte_size, const TiDB::TiDBCollatorPtr & collator)
const override;
void countSerializeByteSize(PaddedPODArray<size_t> & byte_size) const override;

void countSerializeByteSizeUniqueForColumnArray(
PaddedPODArray<size_t> & /* byte_size */,
const IColumn::Offsets & /* array_offsets */,
const TiDB::TiDBCollatorPtr & /* collator */) const override
{
throw Exception(
"Method countSerializeByteSizeUniqueForColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void countSerializeByteSizeForColumnArray(
PaddedPODArray<size_t> & /* byte_size */,
const IColumn::Offsets & /* array_offsets */) const override
Expand All @@ -92,10 +121,28 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>
ErrorCodes::NOT_IMPLEMENTED);
}

void serializeToPosUnique(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
bool has_null,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const override;
void serializeToPos(PaddedPODArray<char *> & pos, size_t start, size_t length, bool has_null) const override;
template <bool has_null>
void serializeToPosImpl(PaddedPODArray<char *> & pos, size_t start, size_t length) const;

void serializeToPosUniqueForColumnArray(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
size_t /* length */,
bool /* has_null */,
const IColumn::Offsets & /* array_offsets */,
const TiDB::TiDBCollatorPtr & /* collator */,
String * /* sort_key_container */) const override
{
throw Exception(
"Method serializeToPosUniqueForColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void serializeToPosForColumnArray(
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
Expand All @@ -108,9 +155,24 @@ class ColumnArray final : public COWPtrHelper<IColumn, ColumnArray>
ErrorCodes::NOT_IMPLEMENTED);
}

void deserializeAndInsertFromPos(PaddedPODArray<char *> & pos, bool use_nt_align_buffer) override;
void deserializeAndInsertFromPosUnique(
PaddedPODArray<const char *> & pos,
bool use_nt_align_buffer,
const TiDB::TiDBCollatorPtr & collator) override;
void deserializeAndInsertFromPos(PaddedPODArray<const char *> & pos, bool use_nt_align_buffer) override;

void deserializeAndInsertFromPosUniqueForColumnArray(
PaddedPODArray<const char *> & /* pos */,
const IColumn::Offsets & /* array_offsets */,
bool /* use_nt_align_buffer */,
const TiDB::TiDBCollatorPtr & /* collator */) override
{
throw Exception(
"Method deserializeAndInsertFromPosUniqueForColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void deserializeAndInsertFromPosForColumnArray(
PaddedPODArray<char *> & /* pos */,
PaddedPODArray<const char *> & /* pos */,
guo-shaoge marked this conversation as resolved.
Show resolved Hide resolved
const IColumn::Offsets & /* array_offsets */,
bool /* use_nt_align_buffer */) override
{
Expand Down
Loading