This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch tpc_preview6
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/tpc_preview6 by this push:
new 9d59bf3f329 use streamvbyte_encode in
DataTypeFixedLengthObject::serialize
9d59bf3f329 is described below
commit 9d59bf3f3296cdd64a156948831005773064477f
Author: BiteTheDDDDt <[email protected]>
AuthorDate: Tue Feb 3 20:07:46 2026 +0800
use streamvbyte_encode in DataTypeFixedLengthObject::serialize
---
.../aggregate_functions/aggregate_function_count.h | 4 +
.../data_types/data_type_fixed_length_object.cpp | 94 +++++++++++-----------
2 files changed, 51 insertions(+), 47 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.h
b/be/src/vec/aggregate_functions/aggregate_function_count.h
index f8a7a4b9b8e..08f753da91e 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_count.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_count.h
@@ -61,6 +61,8 @@ public:
DataTypePtr get_return_type() const override { return
std::make_shared<DataTypeInt64>(); }
+ bool is_trivial() const override { return true; }
+
void add(AggregateDataPtr __restrict place, const IColumn**, ssize_t,
Arena&) const override {
++data(place).count;
}
@@ -191,6 +193,8 @@ public:
DataTypePtr get_return_type() const override { return
std::make_shared<DataTypeInt64>(); }
+ bool is_trivial() const override { return true; }
+
void add(AggregateDataPtr __restrict place, const IColumn** columns,
ssize_t row_num,
Arena&) const override {
data(place).count +=
diff --git a/be/src/vec/data_types/data_type_fixed_length_object.cpp
b/be/src/vec/data_types/data_type_fixed_length_object.cpp
index ab80bf72f9f..ae0d1b2ba0d 100644
--- a/be/src/vec/data_types/data_type_fixed_length_object.cpp
+++ b/be/src/vec/data_types/data_type_fixed_length_object.cpp
@@ -18,6 +18,7 @@
#include "vec/data_types/data_type_fixed_length_object.h"
#include <glog/logging.h>
+#include <streamvbyte.h>
#include <string.h>
#include <ostream>
@@ -33,22 +34,10 @@ namespace doris::vectorized {
char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf,
int be_exec_version) const {
- // const flag
- bool is_const_column = is_column_const(column);
- unaligned_store<bool>(buf, is_const_column);
- buf += sizeof(bool);
-
- // row num
- const auto row_num = column.size();
- unaligned_store<size_t>(buf, row_num);
- buf += sizeof(size_t);
- auto real_need_copy_num = is_const_column ? 1 : row_num;
+ const auto* data_column = &column;
+ size_t real_need_copy_num = 0;
+ buf = serialize_const_flag_and_row_num(&data_column, buf,
&real_need_copy_num);
- const IColumn* data_column = &column;
- if (is_const_column) {
- const auto& const_column = assert_cast<const ColumnConst&>(column);
- data_column = &(const_column.get_data_column());
- }
const auto& src_col = assert_cast<const ColumnType&>(*data_column);
DCHECK(src_col.item_size() > 0)
<< "[serialize]item size of DataTypeFixedLengthObject should be
greater than 0";
@@ -56,39 +45,46 @@ char* DataTypeFixedLengthObject::serialize(const IColumn&
column, char* buf,
// item size
unaligned_store<size_t>(buf, src_col.item_size());
buf += sizeof(size_t);
- // column data
+
+ auto mem_size = real_need_copy_num * src_col.item_size();
const auto* origin_data = src_col.get_data().data();
- memcpy(buf, origin_data, real_need_copy_num * src_col.item_size());
- buf += real_need_copy_num * src_col.item_size();
- return buf;
+ // column data
+ if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
+ memcpy(buf, origin_data, mem_size);
+ return buf + mem_size;
+ } else {
+ // Throw exception if mem_size is large than UINT32_MAX
+ auto encode_size = streamvbyte_encode(reinterpret_cast<const
uint32_t*>(origin_data),
+
cast_set<UInt32>(upper_int32(mem_size)),
+ (uint8_t*)(buf +
sizeof(size_t)));
+ unaligned_store<size_t>(buf, encode_size);
+ buf += sizeof(size_t);
+ return buf + encode_size;
+ }
}
const char* DataTypeFixedLengthObject::deserialize(const char* buf,
MutableColumnPtr* column,
int be_exec_version) const {
- //const flag
- bool is_const_column = unaligned_load<bool>(buf);
- buf += sizeof(bool);
- //row num
- size_t row_num = unaligned_load<size_t>(buf);
- buf += sizeof(size_t);
- //item size
- size_t item_size = unaligned_load<size_t>(buf);
- buf += sizeof(size_t);
+ size_t real_have_saved_num = 0;
+ buf = deserialize_const_flag_and_row_num(buf, column,
&real_have_saved_num);
- DCHECK(item_size > 0)
- << "[deserialize]item size of DataTypeFixedLengthObject should be
greater than 0";
-
- auto& dst_col = static_cast<ColumnType&>(*(column->get()));
+ auto& dst_col = assert_cast<ColumnType&>(*(column->get()));
+ auto item_size = unaligned_load<size_t>(buf);
+ buf += sizeof(size_t);
dst_col.set_item_size(item_size);
- // column data
- auto real_copy_num = is_const_column ? 1 : row_num;
- dst_col.resize(real_copy_num);
- memcpy(dst_col.get_data().data(), buf, real_copy_num * item_size);
- buf += real_copy_num * item_size;
- if (is_const_column) {
- auto const_column = ColumnConst::create((*column)->get_ptr(), row_num);
- *column = const_column->get_ptr();
+
+ auto mem_size = real_have_saved_num * item_size;
+ dst_col.resize(real_have_saved_num);
+ if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
+ memcpy(dst_col.get_data().data(), buf, mem_size);
+ buf = buf + mem_size;
+ } else {
+ auto encode_size = unaligned_load<size_t>(buf);
+ buf += sizeof(size_t);
+ streamvbyte_decode((const uint8_t*)buf,
(uint32_t*)(dst_col.get_data().data()),
+ cast_set<UInt32>(upper_int32(mem_size)));
+ buf = buf + encode_size;
}
return buf;
}
@@ -97,14 +93,18 @@ const char* DataTypeFixedLengthObject::deserialize(const
char* buf, MutableColum
// data : item data1 | item data2...
int64_t DataTypeFixedLengthObject::get_uncompressed_serialized_bytes(const
IColumn& column,
int
be_exec_version) const {
- auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t);
- const IColumn* data_column = &column;
- if (is_column_const(column)) {
- const auto& const_column = assert_cast<const ColumnConst&>(column);
- data_column = &(const_column.get_data_column());
+ auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t) +
sizeof(size_t);
+ auto real_need_copy_num = is_column_const(column) ? 1 : column.size();
+ const auto& src_col = assert_cast<const ColumnType&>(column);
+ auto mem_size = src_col.item_size() * real_need_copy_num;
+ if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
+ return size + mem_size;
+ } else {
+ // Throw exception if mem_size is large than UINT32_MAX
+ return size + sizeof(size_t) +
+ std::max(mem_size,
+
streamvbyte_max_compressedbytes(cast_set<UInt32>(upper_int32(mem_size))));
}
- const auto& src_col = assert_cast<const ColumnType&>(*data_column);
- return static_cast<const ColumnType&>(src_col).byte_size() + size;
}
MutableColumnPtr DataTypeFixedLengthObject::create_column() const {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]