This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 62f15712030 branch-4.1: [refactor](storage type) remove deepcopy from 
typeinfo #61856 (#61913)
62f15712030 is described below

commit 62f15712030e750911cdc75041e264a0c67ade28
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Mar 31 12:06:45 2026 +0800

    branch-4.1: [refactor](storage type) remove deepcopy from typeinfo #61856 
(#61913)
    
    Cherry-picked from #61856
    
    Co-authored-by: yiguolei <[email protected]>
---
 .../bloom_filter/bloom_filter_index_writer.cpp     |  10 +-
 .../index/bloom_filter/bloom_filter_index_writer.h |   9 +-
 be/src/storage/index/indexed_column_writer.cpp     |  13 +--
 be/src/storage/index/indexed_column_writer.h       |   9 +-
 be/src/storage/types.h                             | 101 ---------------------
 be/test/storage/storage_types_test.cpp             |  23 +----
 6 files changed, 19 insertions(+), 146 deletions(-)

diff --git a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp 
b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp
index f87eebfb8bf..f169334ef02 100644
--- a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp
+++ b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp
@@ -179,7 +179,13 @@ Status 
PrimaryKeyBloomFilterIndexWriterImpl::add_values(const void* values, size
     const auto* v = (const Slice*)values;
     for (int i = 0; i < count; ++i) {
         Slice new_value;
-        RETURN_IF_CATCH_EXCEPTION(_type_info->deep_copy(&new_value, v, 
_arena));
+        new_value.size = v->size;
+        if (v->size > 0) {
+            new_value.data = _arena.alloc(v->size);
+            memcpy(new_value.data, v->data, v->size);
+        } else {
+            new_value.data = nullptr;
+        }
         _values.push_back(new_value);
         ++v;
     }
@@ -374,7 +380,7 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::create(const 
BloomFilterOptions& bf
     case FieldType::OLAP_FIELD_TYPE_CHAR:
     case FieldType::OLAP_FIELD_TYPE_VARCHAR:
     case FieldType::OLAP_FIELD_TYPE_STRING:
-        *res = 
std::make_unique<PrimaryKeyBloomFilterIndexWriterImpl>(bf_options, typeinfo);
+        *res = 
std::make_unique<PrimaryKeyBloomFilterIndexWriterImpl>(bf_options);
         break;
     default:
         return Status::NotSupported("unsupported type for primary key bloom 
filter index:{}",
diff --git a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.h 
b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.h
index 07525621a93..0d4aecee850 100644
--- a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.h
+++ b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.h
@@ -69,12 +69,8 @@ private:
 // `BloomFilterIndexWriterImpl`, so vector can be used to accelerate.
 class PrimaryKeyBloomFilterIndexWriterImpl : public BloomFilterIndexWriter {
 public:
-    explicit PrimaryKeyBloomFilterIndexWriterImpl(const BloomFilterOptions& 
bf_options,
-                                                  const TypeInfo* type_info)
-            : _bf_options(bf_options),
-              _type_info(type_info),
-              _has_null(false),
-              _bf_buffer_size(0) {}
+    explicit PrimaryKeyBloomFilterIndexWriterImpl(const BloomFilterOptions& 
bf_options)
+            : _bf_options(bf_options), _has_null(false), _bf_buffer_size(0) {}
 
     ~PrimaryKeyBloomFilterIndexWriterImpl() override {
         for (auto& bf : _bfs) {
@@ -101,7 +97,6 @@ public:
 
 private:
     BloomFilterOptions _bf_options;
-    const TypeInfo* _type_info = nullptr;
     Arena _arena;
     bool _has_null;
     uint64_t _bf_buffer_size;
diff --git a/be/src/storage/index/indexed_column_writer.cpp 
b/be/src/storage/index/indexed_column_writer.cpp
index e57390954b9..92816460221 100644
--- a/be/src/storage/index/indexed_column_writer.cpp
+++ b/be/src/storage/index/indexed_column_writer.cpp
@@ -49,9 +49,7 @@ IndexedColumnWriter::IndexedColumnWriter(const 
IndexedColumnWriterOptions& optio
           _num_data_pages(0),
           _disk_size(0),
           _value_key_coder(nullptr),
-          _compress_codec(nullptr) {
-    _first_value.resize(_type_info->size());
-}
+          _compress_codec(nullptr) {}
 
 IndexedColumnWriter::~IndexedColumnWriter() = default;
 
@@ -86,8 +84,9 @@ Status IndexedColumnWriter::init() {
 
 Status IndexedColumnWriter::add(const void* value) {
     if (_options.write_value_index && _data_page_builder->count() == 0) {
-        // remember page's first value because it's used to build value index
-        _type_info->deep_copy(_first_value.data(), value, _arena);
+        // remember page's first value encoded key because it's used to build 
value index
+        _first_value_string.clear();
+        _value_key_coder->full_encode_ascending(value, &_first_value_string);
     }
     size_t num_to_write = 1;
     RETURN_IF_ERROR(
@@ -144,10 +143,8 @@ Status 
IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
     }
 
     if (_options.write_value_index) {
-        std::string key;
-        _value_key_coder->full_encode_ascending(_first_value.data(), &key);
         // TODO short separate key optimize
-        _value_index_builder->add(key, _last_data_page);
+        _value_index_builder->add(_first_value_string, _last_data_page);
         // TODO record last key in short separate key optimize
     }
     return Status::OK();
diff --git a/be/src/storage/index/indexed_column_writer.h 
b/be/src/storage/index/indexed_column_writer.h
index 14ee127dc0f..d201cf8fc89 100644
--- a/be/src/storage/index/indexed_column_writer.h
+++ b/be/src/storage/index/indexed_column_writer.h
@@ -24,12 +24,11 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <string>
 
 #include "common/status.h"
-#include "core/arena.h"
 #include "storage/segment/common.h"
 #include "storage/segment/page_pointer.h"
-#include "util/faststring.h"
 
 namespace doris {
 
@@ -96,14 +95,12 @@ private:
     IndexedColumnWriterOptions _options;
     const TypeInfo* _type_info = nullptr;
     io::FileWriter* _file_writer = nullptr;
-    // only used for `_first_value`
-    Arena _arena;
 
     ordinal_t _num_values;
     uint32_t _num_data_pages;
     uint64_t _disk_size;
-    // remember the first value in current page
-    faststring _first_value;
+    // remember the encoded first value key in current page
+    std::string _first_value_string;
     PagePointer _last_data_page;
 
     // the following members are initialized in init()
diff --git a/be/src/storage/types.h b/be/src/storage/types.h
index 9fa09299687..3e396953086 100644
--- a/be/src/storage/types.h
+++ b/be/src/storage/types.h
@@ -33,7 +33,6 @@
 #include "common/config.h"
 #include "common/consts.h"
 #include "common/status.h"
-#include "core/arena.h"
 #include "core/decimal12.h"
 #include "core/extended_types.h"
 #include "core/packed_int128.h"
@@ -71,8 +70,6 @@ public:
     virtual ~TypeInfo() = default;
     virtual int cmp(const void* left, const void* right) const = 0;
 
-    virtual void deep_copy(void* dest, const void* src, Arena& arena) const = 
0;
-
     virtual void set_to_max(void* buf) const = 0;
     virtual void set_to_min(void* buf) const = 0;
 
@@ -85,10 +82,6 @@ class ScalarTypeInfo : public TypeInfo {
 public:
     int cmp(const void* left, const void* right) const override { return 
_cmp(left, right); }
 
-    void deep_copy(void* dest, const void* src, Arena& arena) const override {
-        _deep_copy(dest, src, arena);
-    }
-
     void set_to_max(void* buf) const override { _set_to_max(buf); }
     void set_to_min(void* buf) const override { _set_to_min(buf); }
     size_t size() const override { return _size; }
@@ -98,7 +91,6 @@ public:
     template <typename TypeTraitsClass>
     ScalarTypeInfo(TypeTraitsClass t)
             : _cmp(TypeTraitsClass::cmp),
-              _deep_copy(TypeTraitsClass::deep_copy),
               _set_to_max(TypeTraitsClass::set_to_max),
               _set_to_min(TypeTraitsClass::set_to_min),
               _size(TypeTraitsClass::size),
@@ -107,8 +99,6 @@ public:
 private:
     int (*_cmp)(const void* left, const void* right);
 
-    void (*_deep_copy)(void* dest, const void* src, Arena& arena);
-
     void (*_set_to_max)(void* buf);
     void (*_set_to_min)(void* buf);
 
@@ -169,39 +159,6 @@ public:
         }
     }
 
-    void deep_copy(void* dest, const void* src, Arena& arena) const override {
-        auto dest_value = reinterpret_cast<CollectionValue*>(dest);
-        auto src_value = reinterpret_cast<const CollectionValue*>(src);
-
-        if (src_value->length() == 0) {
-            new (dest_value) CollectionValue(src_value->length());
-            return;
-        }
-
-        dest_value->set_length(src_value->length());
-
-        size_t item_size = src_value->length() * _item_size;
-        size_t nulls_size = src_value->has_null() ? src_value->length() : 0;
-        dest_value->set_data(arena.alloc(item_size + nulls_size));
-        dest_value->set_has_null(src_value->has_null());
-        dest_value->set_null_signs(src_value->has_null()
-                                           ? 
reinterpret_cast<bool*>(dest_value->mutable_data()) +
-                                                     item_size
-                                           : nullptr);
-
-        // copy null_signs
-        if (src_value->has_null()) {
-            dest_value->copy_null_signs(src_value);
-        }
-
-        // copy item
-        for (uint32_t i = 0; i < src_value->length(); ++i) {
-            if (dest_value->is_null_at(i)) continue;
-            _item_type_info->deep_copy((uint8_t*)(dest_value->mutable_data()) 
+ i * _item_size,
-                                       (uint8_t*)(src_value->data()) + i * 
_item_size, arena);
-        }
-    }
-
     void set_to_max(void* buf) const override {
         DCHECK(false) << "set_to_max of list is not implemented.";
     }
@@ -253,8 +210,6 @@ public:
         }
     }
 
-    void deep_copy(void* dest, const void* src, Arena& arena) const override { 
DCHECK(false); }
-
     void set_to_max(void* buf) const override {
         DCHECK(false) << "set_to_max of list is not implemented.";
     }
@@ -328,44 +283,6 @@ public:
         }
     }
 
-    void deep_copy(void* dest, const void* src, Arena& arena) const override {
-        auto dest_value = reinterpret_cast<StructValue*>(dest);
-        auto src_value = reinterpret_cast<const StructValue*>(src);
-
-        if (src_value->size() == 0) {
-            new (dest_value) StructValue(src_value->size());
-            return;
-        }
-
-        dest_value->set_size(src_value->size());
-        dest_value->set_has_null(src_value->has_null());
-
-        size_t allocate_size = src_value->size() * 
sizeof(*src_value->values());
-        // allocate memory for children value
-        for (uint32_t i = 0; i < src_value->size(); ++i) {
-            if (src_value->is_null_at(i)) continue;
-            allocate_size += _type_infos[i]->size();
-        }
-
-        dest_value->set_values((void**)arena.alloc(allocate_size));
-        auto ptr = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
-        ptr += dest_value->size() * sizeof(*dest_value->values());
-
-        for (uint32_t i = 0; i < src_value->size(); ++i) {
-            dest_value->set_child_value(nullptr, i);
-            if (src_value->is_null_at(i)) continue;
-            dest_value->set_child_value(ptr, i);
-            ptr += _type_infos[i]->size();
-        }
-
-        // copy children value
-        for (uint32_t i = 0; i < src_value->size(); ++i) {
-            if (src_value->is_null_at(i)) continue;
-            _type_infos[i]->deep_copy(dest_value->mutable_child_value(i), 
src_value->child_value(i),
-                                      arena);
-        }
-    }
-
     void set_to_max(void* buf) const override {
         DCHECK(false) << "set_to_max of list is not implemented.";
     }
@@ -594,10 +511,6 @@ struct BaseFieldTypeTraits : public 
CppTypeTraits<field_type> {
         }
     }
 
-    static inline void deep_copy(void* dest, const void* src, Arena& arena) {
-        memcpy(dest, src, sizeof(CppType));
-    }
-
     static inline void set_to_max(void* buf) {
         set_cpp_type_value(buf, type_limit<CppType>::max());
     }
@@ -634,12 +547,6 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_BOOL>
 template <>
 struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_LARGEINT>
         : public NumericFieldTypeTraits<FieldType::OLAP_FIELD_TYPE_LARGEINT, 
true> {
-    // GCC7.3 will generate movaps instruction, which will lead to SEGV when 
buf is
-    // not aligned to 16 byte
-    static void deep_copy(void* dest, const void* src, Arena& arena) {
-        *reinterpret_cast<PackedInt128*>(dest) = *reinterpret_cast<const 
PackedInt128*>(src);
-    }
-
     static void set_to_max(void* buf) {
         *reinterpret_cast<PackedInt128*>(buf) = ~((int128_t)(1) << 127);
     }
@@ -757,14 +664,6 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_CHAR>
         auto r_slice = reinterpret_cast<const Slice*>(right);
         return l_slice->compare(*r_slice);
     }
-    static void deep_copy(void* dest, const void* src, Arena& arena) {
-        auto l_slice = reinterpret_cast<Slice*>(dest);
-        auto r_slice = reinterpret_cast<const Slice*>(src);
-        l_slice->data = arena.alloc(r_slice->size);
-        memcpy(l_slice->data, r_slice->data, r_slice->size);
-        l_slice->size = r_slice->size;
-    }
-
     // Using field.set_to_max to set varchar/char,not here.
     static void (*set_to_max)(void*);
 
diff --git a/be/test/storage/storage_types_test.cpp 
b/be/test/storage/storage_types_test.cpp
index 131f5a90021..b3ba23a51a0 100644
--- a/be/test/storage/storage_types_test.cpp
+++ b/be/test/storage/storage_types_test.cpp
@@ -21,7 +21,6 @@
 
 #include <memory>
 
-#include "core/arena.h"
 #include "core/decimal12.h"
 #include "core/uint24.h"
 #include "gtest/gtest_pred_impl.h"
@@ -46,12 +45,6 @@ void common_test(typename TypeTraits<field_type>::CppType 
src_val) {
 
     EXPECT_EQ(field_type, type->type());
     EXPECT_EQ(sizeof(src_val), type->size());
-    {
-        typename TypeTraits<field_type>::CppType dst_val;
-        Arena pool;
-        type->deep_copy((char*)&dst_val, (char*)&src_val, pool);
-        EXPECT_EQ(0, type->cmp((char*)&src_val, (char*)&dst_val));
-    }
     // test min
     {
         typename TypeTraits<field_type>::CppType dst_val;
@@ -76,13 +69,6 @@ void test_char(Slice src_val) {
 
     EXPECT_EQ(field->type(), fieldType);
     EXPECT_EQ(sizeof(src_val), type->size());
-    {
-        char buf[64];
-        Slice dst_val(buf, sizeof(buf));
-        Arena pool;
-        type->deep_copy((char*)&dst_val, (char*)&src_val, pool);
-        EXPECT_EQ(0, type->cmp((char*)&src_val, (char*)&dst_val));
-    }
     // test min
     {
         char buf[64];
@@ -112,7 +98,7 @@ void common_test<FieldType::OLAP_FIELD_TYPE_VARCHAR>(Slice 
src_val) {
     test_char<FieldType::OLAP_FIELD_TYPE_VARCHAR>(src_val);
 }
 
-TEST(TypesTest, copy_and_equal) {
+TEST(TypesTest, cmp_and_minmax) {
     common_test<FieldType::OLAP_FIELD_TYPE_BOOL>(true);
     common_test<FieldType::OLAP_FIELD_TYPE_TINYINT>(112);
     
common_test<FieldType::OLAP_FIELD_TYPE_SMALLINT>(static_cast<short>(54321));
@@ -152,13 +138,6 @@ void common_test_array(CollectionValue src_val) {
     auto array_type = get_type_info(&list_column);
     ASSERT_EQ(item_type,
               dynamic_cast<const 
ArrayTypeInfo*>(array_type.get())->item_type_info()->type());
-
-    { // test deep copy
-        CollectionValue dst_val;
-        Arena pool;
-        array_type->deep_copy((char*)&dst_val, (char*)&src_val, pool);
-        EXPECT_EQ(0, array_type->cmp((char*)&src_val, (char*)&dst_val));
-    }
 }
 
 TEST(ArrayTypeTest, copy_and_equal) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to