This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch tpc_preview6
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/tpc_preview6 by this push:
     new 9d59bf3f329 use streamvbyte_encode in 
DataTypeFixedLengthObject::serialize
9d59bf3f329 is described below

commit 9d59bf3f3296cdd64a156948831005773064477f
Author: BiteTheDDDDt <[email protected]>
AuthorDate: Tue Feb 3 20:07:46 2026 +0800

    use streamvbyte_encode in DataTypeFixedLengthObject::serialize
---
 .../aggregate_functions/aggregate_function_count.h |  4 +
 .../data_types/data_type_fixed_length_object.cpp   | 94 +++++++++++-----------
 2 files changed, 51 insertions(+), 47 deletions(-)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.h 
b/be/src/vec/aggregate_functions/aggregate_function_count.h
index f8a7a4b9b8e..08f753da91e 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_count.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_count.h
@@ -61,6 +61,8 @@ public:
 
     DataTypePtr get_return_type() const override { return 
std::make_shared<DataTypeInt64>(); }
 
+    bool is_trivial() const override { return true; }
+
     void add(AggregateDataPtr __restrict place, const IColumn**, ssize_t, 
Arena&) const override {
         ++data(place).count;
     }
@@ -191,6 +193,8 @@ public:
 
     DataTypePtr get_return_type() const override { return 
std::make_shared<DataTypeInt64>(); }
 
+    bool is_trivial() const override { return true; }
+
     void add(AggregateDataPtr __restrict place, const IColumn** columns, 
ssize_t row_num,
              Arena&) const override {
         data(place).count +=
diff --git a/be/src/vec/data_types/data_type_fixed_length_object.cpp 
b/be/src/vec/data_types/data_type_fixed_length_object.cpp
index ab80bf72f9f..ae0d1b2ba0d 100644
--- a/be/src/vec/data_types/data_type_fixed_length_object.cpp
+++ b/be/src/vec/data_types/data_type_fixed_length_object.cpp
@@ -18,6 +18,7 @@
 #include "vec/data_types/data_type_fixed_length_object.h"
 
 #include <glog/logging.h>
+#include <streamvbyte.h>
 #include <string.h>
 
 #include <ostream>
@@ -33,22 +34,10 @@ namespace doris::vectorized {
 
 char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf,
                                            int be_exec_version) const {
-    // const flag
-    bool is_const_column = is_column_const(column);
-    unaligned_store<bool>(buf, is_const_column);
-    buf += sizeof(bool);
-
-    // row num
-    const auto row_num = column.size();
-    unaligned_store<size_t>(buf, row_num);
-    buf += sizeof(size_t);
-    auto real_need_copy_num = is_const_column ? 1 : row_num;
+    const auto* data_column = &column;
+    size_t real_need_copy_num = 0;
+    buf = serialize_const_flag_and_row_num(&data_column, buf, 
&real_need_copy_num);
 
-    const IColumn* data_column = &column;
-    if (is_const_column) {
-        const auto& const_column = assert_cast<const ColumnConst&>(column);
-        data_column = &(const_column.get_data_column());
-    }
     const auto& src_col = assert_cast<const ColumnType&>(*data_column);
     DCHECK(src_col.item_size() > 0)
             << "[serialize]item size of DataTypeFixedLengthObject should be 
greater than 0";
@@ -56,39 +45,46 @@ char* DataTypeFixedLengthObject::serialize(const IColumn& 
column, char* buf,
     // item size
     unaligned_store<size_t>(buf, src_col.item_size());
     buf += sizeof(size_t);
-    // column data
+
+    auto mem_size = real_need_copy_num * src_col.item_size();
     const auto* origin_data = src_col.get_data().data();
-    memcpy(buf, origin_data, real_need_copy_num * src_col.item_size());
-    buf += real_need_copy_num * src_col.item_size();
 
-    return buf;
+    // column data
+    if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
+        memcpy(buf, origin_data, mem_size);
+        return buf + mem_size;
+    } else {
+        // Throw exception if mem_size is large than UINT32_MAX
+        auto encode_size = streamvbyte_encode(reinterpret_cast<const 
uint32_t*>(origin_data),
+                                              
cast_set<UInt32>(upper_int32(mem_size)),
+                                              (uint8_t*)(buf + 
sizeof(size_t)));
+        unaligned_store<size_t>(buf, encode_size);
+        buf += sizeof(size_t);
+        return buf + encode_size;
+    }
 }
 
 const char* DataTypeFixedLengthObject::deserialize(const char* buf, 
MutableColumnPtr* column,
                                                    int be_exec_version) const {
-    //const flag
-    bool is_const_column = unaligned_load<bool>(buf);
-    buf += sizeof(bool);
-    //row num
-    size_t row_num = unaligned_load<size_t>(buf);
-    buf += sizeof(size_t);
-    //item size
-    size_t item_size = unaligned_load<size_t>(buf);
-    buf += sizeof(size_t);
+    size_t real_have_saved_num = 0;
+    buf = deserialize_const_flag_and_row_num(buf, column, 
&real_have_saved_num);
 
-    DCHECK(item_size > 0)
-            << "[deserialize]item size of DataTypeFixedLengthObject should be 
greater than 0";
-
-    auto& dst_col = static_cast<ColumnType&>(*(column->get()));
+    auto& dst_col = assert_cast<ColumnType&>(*(column->get()));
+    auto item_size = unaligned_load<size_t>(buf);
+    buf += sizeof(size_t);
     dst_col.set_item_size(item_size);
-    // column data
-    auto real_copy_num = is_const_column ? 1 : row_num;
-    dst_col.resize(real_copy_num);
-    memcpy(dst_col.get_data().data(), buf, real_copy_num * item_size);
-    buf += real_copy_num * item_size;
-    if (is_const_column) {
-        auto const_column = ColumnConst::create((*column)->get_ptr(), row_num);
-        *column = const_column->get_ptr();
+
+    auto mem_size = real_have_saved_num * item_size;
+    dst_col.resize(real_have_saved_num);
+    if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
+        memcpy(dst_col.get_data().data(), buf, mem_size);
+        buf = buf + mem_size;
+    } else {
+        auto encode_size = unaligned_load<size_t>(buf);
+        buf += sizeof(size_t);
+        streamvbyte_decode((const uint8_t*)buf, 
(uint32_t*)(dst_col.get_data().data()),
+                           cast_set<UInt32>(upper_int32(mem_size)));
+        buf = buf + encode_size;
     }
     return buf;
 }
@@ -97,14 +93,18 @@ const char* DataTypeFixedLengthObject::deserialize(const 
char* buf, MutableColum
 // data  : item data1 | item data2...
 int64_t DataTypeFixedLengthObject::get_uncompressed_serialized_bytes(const 
IColumn& column,
                                                                      int 
be_exec_version) const {
-    auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t);
-    const IColumn* data_column = &column;
-    if (is_column_const(column)) {
-        const auto& const_column = assert_cast<const ColumnConst&>(column);
-        data_column = &(const_column.get_data_column());
+    auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t) + 
sizeof(size_t);
+    auto real_need_copy_num = is_column_const(column) ? 1 : column.size();
+    const auto& src_col = assert_cast<const ColumnType&>(column);
+    auto mem_size = src_col.item_size() * real_need_copy_num;
+    if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
+        return size + mem_size;
+    } else {
+        // Throw exception if mem_size is large than UINT32_MAX
+        return size + sizeof(size_t) +
+               std::max(mem_size,
+                        
streamvbyte_max_compressedbytes(cast_set<UInt32>(upper_int32(mem_size))));
     }
-    const auto& src_col = assert_cast<const ColumnType&>(*data_column);
-    return static_cast<const ColumnType&>(src_col).byte_size() + size;
 }
 
 MutableColumnPtr DataTypeFixedLengthObject::create_column() const {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to