This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 62f15712030 branch-4.1: [refactor](storage type) remove deepcopy from
typeinfo #61856 (#61913)
62f15712030 is described below
commit 62f15712030e750911cdc75041e264a0c67ade28
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Mar 31 12:06:45 2026 +0800
branch-4.1: [refactor](storage type) remove deepcopy from typeinfo #61856
(#61913)
Cherry-picked from #61856
Co-authored-by: yiguolei <[email protected]>
---
.../bloom_filter/bloom_filter_index_writer.cpp | 10 +-
.../index/bloom_filter/bloom_filter_index_writer.h | 9 +-
be/src/storage/index/indexed_column_writer.cpp | 13 +--
be/src/storage/index/indexed_column_writer.h | 9 +-
be/src/storage/types.h | 101 ---------------------
be/test/storage/storage_types_test.cpp | 23 +----
6 files changed, 19 insertions(+), 146 deletions(-)
diff --git a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp
b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp
index f87eebfb8bf..f169334ef02 100644
--- a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp
+++ b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp
@@ -179,7 +179,13 @@ Status
PrimaryKeyBloomFilterIndexWriterImpl::add_values(const void* values, size
const auto* v = (const Slice*)values;
for (int i = 0; i < count; ++i) {
Slice new_value;
- RETURN_IF_CATCH_EXCEPTION(_type_info->deep_copy(&new_value, v,
_arena));
+ new_value.size = v->size;
+ if (v->size > 0) {
+ new_value.data = _arena.alloc(v->size);
+ memcpy(new_value.data, v->data, v->size);
+ } else {
+ new_value.data = nullptr;
+ }
_values.push_back(new_value);
++v;
}
@@ -374,7 +380,7 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::create(const
BloomFilterOptions& bf
case FieldType::OLAP_FIELD_TYPE_CHAR:
case FieldType::OLAP_FIELD_TYPE_VARCHAR:
case FieldType::OLAP_FIELD_TYPE_STRING:
- *res =
std::make_unique<PrimaryKeyBloomFilterIndexWriterImpl>(bf_options, typeinfo);
+ *res =
std::make_unique<PrimaryKeyBloomFilterIndexWriterImpl>(bf_options);
break;
default:
return Status::NotSupported("unsupported type for primary key bloom
filter index:{}",
diff --git a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.h
b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.h
index 07525621a93..0d4aecee850 100644
--- a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.h
+++ b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.h
@@ -69,12 +69,8 @@ private:
// `BloomFilterIndexWriterImpl`, so vector can be used to accelerate.
class PrimaryKeyBloomFilterIndexWriterImpl : public BloomFilterIndexWriter {
public:
- explicit PrimaryKeyBloomFilterIndexWriterImpl(const BloomFilterOptions&
bf_options,
- const TypeInfo* type_info)
- : _bf_options(bf_options),
- _type_info(type_info),
- _has_null(false),
- _bf_buffer_size(0) {}
+ explicit PrimaryKeyBloomFilterIndexWriterImpl(const BloomFilterOptions&
bf_options)
+ : _bf_options(bf_options), _has_null(false), _bf_buffer_size(0) {}
~PrimaryKeyBloomFilterIndexWriterImpl() override {
for (auto& bf : _bfs) {
@@ -101,7 +97,6 @@ public:
private:
BloomFilterOptions _bf_options;
- const TypeInfo* _type_info = nullptr;
Arena _arena;
bool _has_null;
uint64_t _bf_buffer_size;
diff --git a/be/src/storage/index/indexed_column_writer.cpp
b/be/src/storage/index/indexed_column_writer.cpp
index e57390954b9..92816460221 100644
--- a/be/src/storage/index/indexed_column_writer.cpp
+++ b/be/src/storage/index/indexed_column_writer.cpp
@@ -49,9 +49,7 @@ IndexedColumnWriter::IndexedColumnWriter(const
IndexedColumnWriterOptions& optio
_num_data_pages(0),
_disk_size(0),
_value_key_coder(nullptr),
- _compress_codec(nullptr) {
- _first_value.resize(_type_info->size());
-}
+ _compress_codec(nullptr) {}
IndexedColumnWriter::~IndexedColumnWriter() = default;
@@ -86,8 +84,9 @@ Status IndexedColumnWriter::init() {
Status IndexedColumnWriter::add(const void* value) {
if (_options.write_value_index && _data_page_builder->count() == 0) {
- // remember page's first value because it's used to build value index
- _type_info->deep_copy(_first_value.data(), value, _arena);
+ // remember page's first value encoded key because it's used to build
value index
+ _first_value_string.clear();
+ _value_key_coder->full_encode_ascending(value, &_first_value_string);
}
size_t num_to_write = 1;
RETURN_IF_ERROR(
@@ -144,10 +143,8 @@ Status
IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
}
if (_options.write_value_index) {
- std::string key;
- _value_key_coder->full_encode_ascending(_first_value.data(), &key);
// TODO short separate key optimize
- _value_index_builder->add(key, _last_data_page);
+ _value_index_builder->add(_first_value_string, _last_data_page);
// TODO record last key in short separate key optimize
}
return Status::OK();
diff --git a/be/src/storage/index/indexed_column_writer.h
b/be/src/storage/index/indexed_column_writer.h
index 14ee127dc0f..d201cf8fc89 100644
--- a/be/src/storage/index/indexed_column_writer.h
+++ b/be/src/storage/index/indexed_column_writer.h
@@ -24,12 +24,11 @@
#include <cstddef>
#include <cstdint>
#include <memory>
+#include <string>
#include "common/status.h"
-#include "core/arena.h"
#include "storage/segment/common.h"
#include "storage/segment/page_pointer.h"
-#include "util/faststring.h"
namespace doris {
@@ -96,14 +95,12 @@ private:
IndexedColumnWriterOptions _options;
const TypeInfo* _type_info = nullptr;
io::FileWriter* _file_writer = nullptr;
- // only used for `_first_value`
- Arena _arena;
ordinal_t _num_values;
uint32_t _num_data_pages;
uint64_t _disk_size;
- // remember the first value in current page
- faststring _first_value;
+ // remember the encoded first value key in current page
+ std::string _first_value_string;
PagePointer _last_data_page;
// the following members are initialized in init()
diff --git a/be/src/storage/types.h b/be/src/storage/types.h
index 9fa09299687..3e396953086 100644
--- a/be/src/storage/types.h
+++ b/be/src/storage/types.h
@@ -33,7 +33,6 @@
#include "common/config.h"
#include "common/consts.h"
#include "common/status.h"
-#include "core/arena.h"
#include "core/decimal12.h"
#include "core/extended_types.h"
#include "core/packed_int128.h"
@@ -71,8 +70,6 @@ public:
virtual ~TypeInfo() = default;
virtual int cmp(const void* left, const void* right) const = 0;
- virtual void deep_copy(void* dest, const void* src, Arena& arena) const =
0;
-
virtual void set_to_max(void* buf) const = 0;
virtual void set_to_min(void* buf) const = 0;
@@ -85,10 +82,6 @@ class ScalarTypeInfo : public TypeInfo {
public:
int cmp(const void* left, const void* right) const override { return
_cmp(left, right); }
- void deep_copy(void* dest, const void* src, Arena& arena) const override {
- _deep_copy(dest, src, arena);
- }
-
void set_to_max(void* buf) const override { _set_to_max(buf); }
void set_to_min(void* buf) const override { _set_to_min(buf); }
size_t size() const override { return _size; }
@@ -98,7 +91,6 @@ public:
template <typename TypeTraitsClass>
ScalarTypeInfo(TypeTraitsClass t)
: _cmp(TypeTraitsClass::cmp),
- _deep_copy(TypeTraitsClass::deep_copy),
_set_to_max(TypeTraitsClass::set_to_max),
_set_to_min(TypeTraitsClass::set_to_min),
_size(TypeTraitsClass::size),
@@ -107,8 +99,6 @@ public:
private:
int (*_cmp)(const void* left, const void* right);
- void (*_deep_copy)(void* dest, const void* src, Arena& arena);
-
void (*_set_to_max)(void* buf);
void (*_set_to_min)(void* buf);
@@ -169,39 +159,6 @@ public:
}
}
- void deep_copy(void* dest, const void* src, Arena& arena) const override {
- auto dest_value = reinterpret_cast<CollectionValue*>(dest);
- auto src_value = reinterpret_cast<const CollectionValue*>(src);
-
- if (src_value->length() == 0) {
- new (dest_value) CollectionValue(src_value->length());
- return;
- }
-
- dest_value->set_length(src_value->length());
-
- size_t item_size = src_value->length() * _item_size;
- size_t nulls_size = src_value->has_null() ? src_value->length() : 0;
- dest_value->set_data(arena.alloc(item_size + nulls_size));
- dest_value->set_has_null(src_value->has_null());
- dest_value->set_null_signs(src_value->has_null()
- ?
reinterpret_cast<bool*>(dest_value->mutable_data()) +
- item_size
- : nullptr);
-
- // copy null_signs
- if (src_value->has_null()) {
- dest_value->copy_null_signs(src_value);
- }
-
- // copy item
- for (uint32_t i = 0; i < src_value->length(); ++i) {
- if (dest_value->is_null_at(i)) continue;
- _item_type_info->deep_copy((uint8_t*)(dest_value->mutable_data())
+ i * _item_size,
- (uint8_t*)(src_value->data()) + i *
_item_size, arena);
- }
- }
-
void set_to_max(void* buf) const override {
DCHECK(false) << "set_to_max of list is not implemented.";
}
@@ -253,8 +210,6 @@ public:
}
}
- void deep_copy(void* dest, const void* src, Arena& arena) const override {
DCHECK(false); }
-
void set_to_max(void* buf) const override {
DCHECK(false) << "set_to_max of list is not implemented.";
}
@@ -328,44 +283,6 @@ public:
}
}
- void deep_copy(void* dest, const void* src, Arena& arena) const override {
- auto dest_value = reinterpret_cast<StructValue*>(dest);
- auto src_value = reinterpret_cast<const StructValue*>(src);
-
- if (src_value->size() == 0) {
- new (dest_value) StructValue(src_value->size());
- return;
- }
-
- dest_value->set_size(src_value->size());
- dest_value->set_has_null(src_value->has_null());
-
- size_t allocate_size = src_value->size() *
sizeof(*src_value->values());
- // allocate memory for children value
- for (uint32_t i = 0; i < src_value->size(); ++i) {
- if (src_value->is_null_at(i)) continue;
- allocate_size += _type_infos[i]->size();
- }
-
- dest_value->set_values((void**)arena.alloc(allocate_size));
- auto ptr = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
- ptr += dest_value->size() * sizeof(*dest_value->values());
-
- for (uint32_t i = 0; i < src_value->size(); ++i) {
- dest_value->set_child_value(nullptr, i);
- if (src_value->is_null_at(i)) continue;
- dest_value->set_child_value(ptr, i);
- ptr += _type_infos[i]->size();
- }
-
- // copy children value
- for (uint32_t i = 0; i < src_value->size(); ++i) {
- if (src_value->is_null_at(i)) continue;
- _type_infos[i]->deep_copy(dest_value->mutable_child_value(i),
src_value->child_value(i),
- arena);
- }
- }
-
void set_to_max(void* buf) const override {
DCHECK(false) << "set_to_max of list is not implemented.";
}
@@ -594,10 +511,6 @@ struct BaseFieldTypeTraits : public
CppTypeTraits<field_type> {
}
}
- static inline void deep_copy(void* dest, const void* src, Arena& arena) {
- memcpy(dest, src, sizeof(CppType));
- }
-
static inline void set_to_max(void* buf) {
set_cpp_type_value(buf, type_limit<CppType>::max());
}
@@ -634,12 +547,6 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_BOOL>
template <>
struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_LARGEINT>
: public NumericFieldTypeTraits<FieldType::OLAP_FIELD_TYPE_LARGEINT,
true> {
- // GCC7.3 will generate movaps instruction, which will lead to SEGV when
buf is
- // not aligned to 16 byte
- static void deep_copy(void* dest, const void* src, Arena& arena) {
- *reinterpret_cast<PackedInt128*>(dest) = *reinterpret_cast<const
PackedInt128*>(src);
- }
-
static void set_to_max(void* buf) {
*reinterpret_cast<PackedInt128*>(buf) = ~((int128_t)(1) << 127);
}
@@ -757,14 +664,6 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_CHAR>
auto r_slice = reinterpret_cast<const Slice*>(right);
return l_slice->compare(*r_slice);
}
- static void deep_copy(void* dest, const void* src, Arena& arena) {
- auto l_slice = reinterpret_cast<Slice*>(dest);
- auto r_slice = reinterpret_cast<const Slice*>(src);
- l_slice->data = arena.alloc(r_slice->size);
- memcpy(l_slice->data, r_slice->data, r_slice->size);
- l_slice->size = r_slice->size;
- }
-
// Using field.set_to_max to set varchar/char,not here.
static void (*set_to_max)(void*);
diff --git a/be/test/storage/storage_types_test.cpp
b/be/test/storage/storage_types_test.cpp
index 131f5a90021..b3ba23a51a0 100644
--- a/be/test/storage/storage_types_test.cpp
+++ b/be/test/storage/storage_types_test.cpp
@@ -21,7 +21,6 @@
#include <memory>
-#include "core/arena.h"
#include "core/decimal12.h"
#include "core/uint24.h"
#include "gtest/gtest_pred_impl.h"
@@ -46,12 +45,6 @@ void common_test(typename TypeTraits<field_type>::CppType
src_val) {
EXPECT_EQ(field_type, type->type());
EXPECT_EQ(sizeof(src_val), type->size());
- {
- typename TypeTraits<field_type>::CppType dst_val;
- Arena pool;
- type->deep_copy((char*)&dst_val, (char*)&src_val, pool);
- EXPECT_EQ(0, type->cmp((char*)&src_val, (char*)&dst_val));
- }
// test min
{
typename TypeTraits<field_type>::CppType dst_val;
@@ -76,13 +69,6 @@ void test_char(Slice src_val) {
EXPECT_EQ(field->type(), fieldType);
EXPECT_EQ(sizeof(src_val), type->size());
- {
- char buf[64];
- Slice dst_val(buf, sizeof(buf));
- Arena pool;
- type->deep_copy((char*)&dst_val, (char*)&src_val, pool);
- EXPECT_EQ(0, type->cmp((char*)&src_val, (char*)&dst_val));
- }
// test min
{
char buf[64];
@@ -112,7 +98,7 @@ void common_test<FieldType::OLAP_FIELD_TYPE_VARCHAR>(Slice
src_val) {
test_char<FieldType::OLAP_FIELD_TYPE_VARCHAR>(src_val);
}
-TEST(TypesTest, copy_and_equal) {
+TEST(TypesTest, cmp_and_minmax) {
common_test<FieldType::OLAP_FIELD_TYPE_BOOL>(true);
common_test<FieldType::OLAP_FIELD_TYPE_TINYINT>(112);
common_test<FieldType::OLAP_FIELD_TYPE_SMALLINT>(static_cast<short>(54321));
@@ -152,13 +138,6 @@ void common_test_array(CollectionValue src_val) {
auto array_type = get_type_info(&list_column);
ASSERT_EQ(item_type,
dynamic_cast<const
ArrayTypeInfo*>(array_type.get())->item_type_info()->type());
-
- { // test deep copy
- CollectionValue dst_val;
- Arena pool;
- array_type->deep_copy((char*)&dst_val, (char*)&src_val, pool);
- EXPECT_EQ(0, array_type->cmp((char*)&src_val, (char*)&dst_val));
- }
}
TEST(ArrayTypeTest, copy_and_equal) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]