This is an automated email from the ASF dual-hosted git repository.
zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 77bec724630 [BE](ut) add UT about bitmap/hll/quantile_state datatype
(#47845)
77bec724630 is described below
commit 77bec72463044c371e044e1ad5b5b5166650f85d
Author: zhangstar333 <[email protected]>
AuthorDate: Wed Feb 19 09:48:10 2025 +0800
[BE](ut) add UT about bitmap/hll/quantile_state datatype (#47845)
### What problem does this PR solve?
Problem Summary:
add some ut test about
bitmap/hll/quantile_state/agg_state/fixed_length_object datatype
---
be/src/vec/data_types/data_type.h | 6 +
be/test/vec/data_types/common_data_type_test.h | 17 +-
.../vec/data_types/data_type_agg_state_test.cpp | 257 +++++++++++++++++++++
be/test/vec/data_types/data_type_array_test.cpp | 22 +-
be/test/vec/data_types/data_type_bitmap_test.cpp | 218 +++++++++++++++++
.../data_type_fixed_length_object_test.cpp | 153 ++++++++++++
be/test/vec/data_types/data_type_hll_test.cpp | 216 +++++++++++++++++
be/test/vec/data_types/data_type_ip_test.cpp | 4 +-
.../data_types/data_type_quantile_state_test.cpp | 198 ++++++++++++++++
9 files changed, 1073 insertions(+), 18 deletions(-)
diff --git a/be/src/vec/data_types/data_type.h
b/be/src/vec/data_types/data_type.h
index 7f1ee0cd850..3cc32156d20 100644
--- a/be/src/vec/data_types/data_type.h
+++ b/be/src/vec/data_types/data_type.h
@@ -272,6 +272,7 @@ struct WhichDataType {
bool is_struct() const { return idx == TypeIndex::Struct; }
bool is_map() const { return idx == TypeIndex::Map; }
bool is_set() const { return idx == TypeIndex::Set; }
+ bool is_fixed_length_object() const { return idx ==
TypeIndex::FixedLengthObject; }
bool is_nothing() const { return idx == TypeIndex::Nothing; }
bool is_nullable() const { return idx == TypeIndex::Nullable; }
@@ -371,6 +372,11 @@ bool is_string_or_fixed_string(const T& data_type) {
return WhichDataType(data_type).is_string_or_fixed_string();
}
+template <typename T>
+bool is_fixed_length_object(const T& data_type) {
+ return WhichDataType(data_type).is_fixed_length_object();
+}
+
inline bool is_not_decimal_but_comparable_to_decimal(const DataTypePtr&
data_type) {
WhichDataType which(data_type);
return which.is_int() || which.is_uint();
diff --git a/be/test/vec/data_types/common_data_type_test.h
b/be/test/vec/data_types/common_data_type_test.h
index 643f8669fbe..0b85e3977c2 100644
--- a/be/test/vec/data_types/common_data_type_test.h
+++ b/be/test/vec/data_types/common_data_type_test.h
@@ -22,6 +22,8 @@
#include <fstream>
#include <iostream>
+#include "agent/be_exec_version_manager.h"
+#include "olap/schema.h"
#include "vec/columns/column.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
@@ -52,6 +54,10 @@ namespace doris::vectorized {
static bool gen_check_data_in_assert = true;
class CommonDataTypeTest : public ::testing::Test {
+public:
+ CommonDataTypeTest() = default;
+ void TestBody() override {}
+
protected:
// Helper function to load data from CSV, with index which splited by
spliter and load to columns
void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns&
columns,
@@ -164,7 +170,8 @@ public:
ASSERT_EQ(const_col->operator[](i),
default_const_col->operator[](i));
}
// get_uncompressed_serialized_bytes
- ASSERT_EQ(data_type->get_uncompressed_serialized_bytes(*column, 0),
+ ASSERT_EQ(data_type->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version()),
uncompressed_serialized_bytes);
}
@@ -249,7 +256,7 @@ public:
}
// nt be_exec_version, PBlock* pblock, size_t* uncompressed_bytes,
// size_t* compressed_bytes,
segment_v2::CompressionTypePB compression_type,
- size_t be_exec_version = 2;
+ size_t be_exec_version = BeExecVersionManager::get_newest_version();
auto pblock = std::make_unique<PBlock>();
size_t uncompressed_bytes = 0;
size_t compressed_bytes = 0;
@@ -262,9 +269,9 @@ public:
st = block_1->deserialize(*pblock);
ASSERT_EQ(st.ok(), true);
// check block_1 and block is same
- for (int i = 0; i < block->rows(); ++i) {
- auto& col = block->get_by_position(i);
- auto& col_1 = block_1->get_by_position(i);
+ for (auto col_idx = 0; col_idx < block->columns(); ++col_idx) {
+ auto& col = block->get_by_position(col_idx);
+ auto& col_1 = block_1->get_by_position(col_idx);
ASSERT_EQ(col.column->size(), col_1.column->size());
for (int j = 0; j < col.column->size(); ++j) {
ASSERT_EQ(col.column->operator[](j),
col_1.column->operator[](j));
diff --git a/be/test/vec/data_types/data_type_agg_state_test.cpp
b/be/test/vec/data_types/data_type_agg_state_test.cpp
new file mode 100644
index 00000000000..192573de4f3
--- /dev/null
+++ b/be/test/vec/data_types/data_type_agg_state_test.cpp
@@ -0,0 +1,257 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/data_types/data_type_agg_state.h"
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <iostream>
+#include <memory>
+
+#include "agent/be_exec_version_manager.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_fixed_length_object.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/common_data_type_serder_test.h"
+#include "vec/data_types/common_data_type_test.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
+
+// 1. datatype meta info:
+// get_type_id, get_type_as_type_descriptor, get_storage_field_type,
have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta
(PColumnMeta *col_meta)
+// get_family_name, get_is_parametric,
should_align_right_in_pretty_formats
+// text_can_contain_only_valid_utf8
+// have_maximum_size_of_value, get_maximum_size_of_value_in_memory,
get_size_of_value_in_memory
+// get_precision, get_scale
+// is_null_literal, is_value_represented_by_number,
is_value_unambiguously_represented_in_contiguous_memory_region
+// 2. datatype creation with column : create_column, create_column_const
(size_t size, const Field &field), create_column_const_with_default_value
(size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int
be_exec_version)
+// 3. serde related: get_serde (int nesting_level=1)
+// to_string (const IColumn &column, size_t row_num, BufferWritable
&ostr), to_string (const IColumn &column, size_t row_num), to_string_batch
(const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb,
IColumn *column)
+// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream
+// serialize (const IColumn &column, char *buf, int be_exec_version),
deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version)
+
+namespace doris::vectorized {
+
+class DataTypeAggStateTest : public ::testing::TestWithParam<int> {
+public:
+ void SetUp() override {
+ rows_value = GetParam();
+ helper = std::make_unique<CommonDataTypeTest>();
+ }
+ std::unique_ptr<CommonDataTypeTest> helper;
+ DataTypePtr sub_type = std::make_shared<DataTypeInt32>();
+ DataTypes sub_types = {sub_type};
+ // DataTypeAggState---> column_fixed_length_object
+ DataTypePtr datatype_agg_state_count = std::make_shared<DataTypeAggState>(
+ sub_types, false, "count",
BeExecVersionManager::get_newest_version());
+ // DataTypeAggState---> column_string
+ DataTypePtr datatype_agg_state_hll_union =
std::make_shared<DataTypeAggState>(
+ sub_types, false, "hll_union",
BeExecVersionManager::get_newest_version());
+ int rows_value;
+};
+
+TEST_P(DataTypeAggStateTest, MetaInfoTest) {
+ TypeDescriptor agg_state_type_descriptor = {PrimitiveType::TYPE_AGG_STATE};
+ auto col_meta = std::make_shared<PColumnMeta>();
+ col_meta->set_type(PGenericType_TypeId_AGG_STATE);
+ CommonDataTypeTest::DataTypeMetaInfo agg_state_meta_info_to_assert = {
+ .type_id = TypeIndex::AggState,
+ .type_as_type_descriptor = &agg_state_type_descriptor,
+ .family_name = "AggState",
+ .has_subtypes = false,
+ .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_AGG_STATE,
+ .should_align_right_in_pretty_formats = false,
+ .text_can_contain_only_valid_utf8 = false,
+ .have_maximum_size_of_value = false,
+ .size_of_value_in_memory = size_t(-1),
+ .precision = size_t(-1),
+ .scale = size_t(-1),
+ .is_null_literal = false,
+ .is_value_represented_by_number = false,
+ .pColumnMeta = col_meta.get(),
+ .is_value_unambiguously_represented_in_contiguous_memory_region =
true,
+ .default_field = Field(String()),
+ };
+ helper->meta_info_assert(datatype_agg_state_count,
agg_state_meta_info_to_assert);
+}
+
+TEST_P(DataTypeAggStateTest, CreateColumnTest) {
+ Field default_field = Field(String());
+ std::cout << "create_column_assert: " <<
datatype_agg_state_count->get_name() << std::endl;
+ auto column = (datatype_agg_state_count)->create_column();
+ ASSERT_EQ(column->size(), 0);
+ column->insert_default();
+ auto fixed_length_column = ColumnFixedLengthObject::create(8);
+ fixed_length_column->insert(default_field);
+ ASSERT_EQ(fixed_length_column->size(), 1);
+
+ for (int i = 0; i < 1; ++i) {
+ ASSERT_EQ(fixed_length_column->operator[](i), column->operator[](i));
+ }
+ // get_uncompressed_serialized_bytes
+ ASSERT_EQ(datatype_agg_state_count->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version()),
+ 25);
+}
+
+void insert_data_agg_state(MutableColumns* agg_state_cols, DataTypePtr
datatype_agg_state,
+ int rows_value, std::vector<std::string>* data_strs
= nullptr) {
+ auto column_fixed = datatype_agg_state->create_column();
+ agg_state_cols->push_back(column_fixed->get_ptr());
+ std::cout << "insert_data_agg_state: " << datatype_agg_state->get_name()
<< " "
+ << column_fixed->get_name() << std::endl;
+ if (column_fixed->is_column_string()) {
+ ASSERT_TRUE(is_string(assert_cast<const
DataTypeAggState*>(datatype_agg_state.get())
+ ->get_serialized_type()));
+ auto* column = assert_cast<ColumnString*>((*agg_state_cols)[0].get());
+ for (size_t i = 0; i != rows_value; ++i) {
+ auto val = std::to_string(i);
+ column->insert_data(val.c_str(), val.size());
+ if (data_strs) {
+ data_strs->push_back(val);
+ }
+ // std::cout<<"insert_data_agg_state: "<<val<<" "<<val.size()<<"
"<<column->get_data_at(i).to_string()<<std::endl;
+ }
+ } else {
+
assert_cast<ColumnFixedLengthObject*>((*agg_state_cols)[0].get())->set_item_size(8);
+ column_fixed->resize(rows_value);
+ ASSERT_TRUE(is_fixed_length_object(
+ assert_cast<const DataTypeAggState*>(datatype_agg_state.get())
+ ->get_serialized_type()));
+ auto& data =
assert_cast<ColumnFixedLengthObject*>((*agg_state_cols)[0].get())->get_data();
+ for (size_t i = 0; i != rows_value; ++i) {
+ data[i] = i;
+ }
+ }
+ std::cout << "finish insert data" << std::endl;
+}
+
+// // not support function: get_filed
+
+// test to_string | to_string_batch | from_string
+TEST_P(DataTypeAggStateTest, FromAndToStringTest) {
+ MutableColumns agg_state_cols;
+ std::vector<std::string> data_strs;
+ insert_data_agg_state(&agg_state_cols, datatype_agg_state_hll_union,
rows_value, &data_strs);
+
+ {
+ // to_string_batch | from_string
+ auto col_to = ColumnString::create();
+
datatype_agg_state_hll_union->to_string_batch(*agg_state_cols[0]->get_ptr(),
*col_to);
+ ASSERT_EQ(col_to->size(), agg_state_cols[0]->get_ptr()->size());
+ // from_string assert col_to to assert_column and check same with
mutableColumn
+ auto assert_column = datatype_agg_state_hll_union->create_column();
+ for (int i = 0; i < col_to->size(); ++i) {
+ std::string s = col_to->get_data_at(i).to_string();
+ std::cout << "s: " << s << std::endl;
+ ReadBuffer rb(s.data(), s.size());
+ ASSERT_EQ(Status::OK(),
+ datatype_agg_state_hll_union->from_string(rb,
assert_column.get()));
+ ASSERT_EQ(assert_column->operator[](i),
agg_state_cols[0]->get_ptr()->operator[](i))
+ << "i: " << i << " s: " << s
+ << " datatype: " <<
datatype_agg_state_hll_union->get_name()
+ << " assert_column: " << assert_column->get_name()
+ << " mutableColumn:" <<
agg_state_cols[0]->get_ptr()->get_name() << std::endl;
+ }
+ std::cout << "finish to_string_batch | from_string test" << std::endl;
+ }
+
+ {
+ // to_string | from_string
+ auto ser_col = ColumnString::create();
+ ser_col->reserve(agg_state_cols[0]->get_ptr()->size());
+ VectorBufferWriter buffer_writer(*ser_col.get());
+ for (int i = 0; i < agg_state_cols[0]->get_ptr()->size(); ++i) {
+
datatype_agg_state_hll_union->to_string(*agg_state_cols[0]->get_ptr(), i,
+ buffer_writer);
+ std::string res =
+
datatype_agg_state_hll_union->to_string(*agg_state_cols[0]->get_ptr(), i);
+ buffer_writer.commit();
+ EXPECT_EQ(data_strs[i], ser_col->get_data_at(i).to_string());
+ }
+ // check ser_col to assert_column and check same with mutableColumn
+ auto assert_column_1 = datatype_agg_state_hll_union->create_column();
+ for (int i = 0; i < ser_col->size(); ++i) {
+ std::string s = ser_col->get_data_at(i).to_string();
+ ReadBuffer rb(s.data(), s.size());
+ ASSERT_EQ(Status::OK(),
+ datatype_agg_state_hll_union->from_string(rb,
assert_column_1.get()));
+ auto aaa = assert_column_1->operator[](i);
+ ASSERT_EQ(assert_column_1->operator[](i),
agg_state_cols[0]->get_ptr()->operator[](i));
+ }
+ std::cout << "finish to_string | from_string test" << std::endl;
+ }
+}
+
+// // serialize / deserialize
+TEST_P(DataTypeAggStateTest, SerializeDeserializeTest) {
+ MutableColumns agg_state_cols;
+ insert_data_agg_state(&agg_state_cols, datatype_agg_state_hll_union,
rows_value);
+
+ auto* column = assert_cast<ColumnString*>(agg_state_cols[0].get());
+ auto size =
datatype_agg_state_hll_union->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version());
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ auto* result = datatype_agg_state_hll_union->serialize(
+ *column, buf.get(), BeExecVersionManager::get_newest_version());
+ ASSERT_EQ(result, buf.get() + size);
+
+ auto column2 = datatype_agg_state_hll_union->create_column();
+ datatype_agg_state_hll_union->deserialize(buf.get(), &column2,
+
BeExecVersionManager::get_newest_version());
+ for (size_t i = 0; i != rows_value; ++i) {
+ auto* column_res = assert_cast<ColumnString*>(column2.get());
+ ASSERT_EQ(column->get_data_at(i).to_string(),
column_res->get_data_at(i).to_string());
+ }
+ helper->serialize_deserialize_assert(agg_state_cols,
{datatype_agg_state_hll_union});
+ std::cout << "finish serialize deserialize test" << std::endl;
+}
+
+// // serialize / deserialize
+TEST_P(DataTypeAggStateTest, SerializeDeserializeTest2) {
+ MutableColumns agg_state_cols;
+ insert_data_agg_state(&agg_state_cols, datatype_agg_state_count,
rows_value);
+
+ auto* column =
assert_cast<ColumnFixedLengthObject*>(agg_state_cols[0].get());
+ auto size = datatype_agg_state_count->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version());
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ auto* result = datatype_agg_state_count->serialize(*column, buf.get(),
+
BeExecVersionManager::get_newest_version());
+ ASSERT_EQ(result, buf.get() + size);
+
+ auto column2 = datatype_agg_state_count->create_column();
+ datatype_agg_state_count->deserialize(buf.get(), &column2,
+
BeExecVersionManager::get_newest_version());
+ for (size_t i = 0; i != rows_value; ++i) {
+ auto* column_res =
assert_cast<ColumnFixedLengthObject*>(column2.get());
+ ASSERT_EQ(column->get_data_at(i).to_string(),
column_res->get_data_at(i).to_string());
+ }
+ helper->serialize_deserialize_assert(agg_state_cols,
{datatype_agg_state_count});
+ std::cout << "finish serialize deserialize test2" << std::endl;
+}
+
+INSTANTIATE_TEST_SUITE_P(Params, DataTypeAggStateTest, ::testing::Values(0, 1,
31));
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/data_types/data_type_array_test.cpp
b/be/test/vec/data_types/data_type_array_test.cpp
index d50ae0be26b..1c0ee6f97ef 100644
--- a/be/test/vec/data_types/data_type_array_test.cpp
+++ b/be/test/vec/data_types/data_type_array_test.cpp
@@ -363,59 +363,59 @@ TEST_F(DataTypeArrayTest, CreateColumnTest) {
auto type = remove_nullable(array_types[i]);
// any different nested type in arr with same default array ?
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 16);
+ create_column_assert(type, default_field_array, 51); // 17 * 3
}
{
auto type = remove_nullable(array_types[13]);
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 24);
+ create_column_assert(type, default_field_array, 59); // add addtional
sizeof(8)
}
// for decimal32/64/128/256 here uncompressed size is 16
// one scalar type
for (int i = 14; i < 18; i++) {
auto type = remove_nullable(array_types[i]);
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 16);
+ create_column_assert(type, default_field_array, 51);
}
// for array-array-scala
for (int i = 18; i < 31; i++) {
auto type = remove_nullable(array_types[i]);
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 28);
+ create_column_assert(type, default_field_array, 85); // 17 * 5
}
{
// string type
auto type = remove_nullable(array_types[31]);
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 36);
+ create_column_assert(type, default_field_array, 93); // add addtional
sizeof(8)
}
for (int i = 32; i < 36; i++) {
auto type = remove_nullable(array_types[i]);
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 28);
+ create_column_assert(type, default_field_array, 85); // 17 * 5
}
// for array-map
{
auto type = remove_nullable(array_types[36]);
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 44);
+ create_column_assert(type, default_field_array, 127); // 17 * 7 + 8
add addtional sizeof(8)
type = remove_nullable(array_types[39]);
default_field_array = Array();
- create_column_assert(type, default_field_array, 44);
+ create_column_assert(type, default_field_array, 127);
}
{
auto type = remove_nullable(array_types[37]);
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 36);
+ create_column_assert(type, default_field_array, 119);
type = remove_nullable(array_types[38]);
default_field_array = Array();
- create_column_assert(type, default_field_array, 36);
+ create_column_assert(type, default_field_array, 119); // 17 * 7
}
// for array-struct
{
auto type = remove_nullable(array_types[40]);
Field default_field_array = Array();
- create_column_assert(type, default_field_array, 76);
+ create_column_assert(type, default_field_array, 297); // 17 * 17
}
}
diff --git a/be/test/vec/data_types/data_type_bitmap_test.cpp
b/be/test/vec/data_types/data_type_bitmap_test.cpp
new file mode 100644
index 00000000000..58291f06a79
--- /dev/null
+++ b/be/test/vec/data_types/data_type_bitmap_test.cpp
@@ -0,0 +1,218 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/data_types/data_type_bitmap.h"
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <iostream>
+
+#include "agent/be_exec_version_manager.h"
+#include "util/bitmap_value.h"
+#include "vec/columns/column.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/common_data_type_serder_test.h"
+#include "vec/data_types/common_data_type_test.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
+
+// 1. datatype meta info:
+// get_type_id, get_type_as_type_descriptor, get_storage_field_type,
have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta
(PColumnMeta *col_meta)
+// get_family_name, get_is_parametric,
should_align_right_in_pretty_formats
+// text_can_contain_only_valid_utf8
+// have_maximum_size_of_value, get_maximum_size_of_value_in_memory,
get_size_of_value_in_memory
+// get_precision, get_scale
+// is_null_literal, is_value_represented_by_number,
is_value_unambiguously_represented_in_contiguous_memory_region
+// 2. datatype creation with column : create_column, create_column_const
(size_t size, const Field &field), create_column_const_with_default_value
(size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int
be_exec_version)
+// 3. serde related: get_serde (int nesting_level=1)
+// to_string (const IColumn &column, size_t row_num, BufferWritable
&ostr), to_string (const IColumn &column, size_t row_num), to_string_batch
(const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb,
IColumn *column)
+// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream
+// serialize (const IColumn &column, char *buf, int be_exec_version),
deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version)
+
+namespace doris::vectorized {
+
+class DataTypeBitMapTest : public ::testing::TestWithParam<int> {
+public:
+ void SetUp() override {
+ rows_value = GetParam();
+ helper = std::make_unique<CommonDataTypeTest>();
+ }
+ std::unique_ptr<CommonDataTypeTest> helper;
+ DataTypePtr dt_bitmap =
+
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_OBJECT,
0, 0);
+ int rows_value;
+};
+
+TEST_P(DataTypeBitMapTest, MetaInfoTest) {
+ TypeDescriptor bitmap_type_descriptor = {PrimitiveType::TYPE_OBJECT};
+ auto col_meta = std::make_shared<PColumnMeta>();
+ col_meta->set_type(PGenericType_TypeId_BITMAP);
+ CommonDataTypeTest::DataTypeMetaInfo bitmap_meta_info_to_assert = {
+ .type_id = TypeIndex::BitMap,
+ .type_as_type_descriptor = &bitmap_type_descriptor,
+ .family_name = "BitMap",
+ .has_subtypes = false,
+ .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_OBJECT,
+ .should_align_right_in_pretty_formats = false,
+ .text_can_contain_only_valid_utf8 = true,
+ .have_maximum_size_of_value = false,
+ .size_of_value_in_memory = size_t(-1),
+ .precision = size_t(-1),
+ .scale = size_t(-1),
+ .is_null_literal = false,
+ .is_value_represented_by_number = false,
+ .pColumnMeta = col_meta.get(),
+ .is_value_unambiguously_represented_in_contiguous_memory_region =
true,
+ .default_field = BitmapValue::empty_bitmap(),
+ };
+ helper->meta_info_assert(dt_bitmap, bitmap_meta_info_to_assert);
+}
+
+TEST_P(DataTypeBitMapTest, CreateColumnTest) {
+ Field default_field_bitmap = BitmapValue::empty_bitmap();
+ helper->create_column_assert(dt_bitmap, default_field_bitmap, 17);
+}
+
+void insert_data_bitmap(MutableColumns* bitmap_cols, DataTypePtr dt_bitmap,
int rows_value,
+ std::vector<std::string>* data_strs = nullptr) {
+ auto serde_bitmap = dt_bitmap->get_serde(1);
+ auto column_bitmap = dt_bitmap->create_column();
+
+ bitmap_cols->push_back(column_bitmap->get_ptr());
+ DataTypeSerDeSPtrs serde = {dt_bitmap->get_serde()};
+ auto& data =
assert_cast<ColumnBitmap*>((*bitmap_cols)[0].get())->get_data();
+ for (size_t i = 0; i != rows_value; ++i) {
+ BitmapValue bitmap_value;
+ for (size_t j = 0; j <= i; ++j) {
+ bitmap_value.add(j);
+ }
+ if (data_strs) {
+ data_strs->push_back(bitmap_value.to_string());
+ }
+ std::string memory_buffer(bitmap_value.getSizeInBytes(), '0');
+ bitmap_value.write_to(memory_buffer.data());
+ data.emplace_back(std::move(bitmap_value));
+ }
+ std::cout << "finish insert data" << std::endl;
+}
+
+// not support function: get_filed
+
+// test to_string | to_string_batch | from_string
+TEST_P(DataTypeBitMapTest, FromAndToStringTest) {
+ MutableColumns bitmap_cols;
+ std::vector<std::string> data_strs;
+ insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value, &data_strs);
+
+ {
+ // to_string_batch | from_string
+ auto col_to = ColumnString::create();
+ dt_bitmap->to_string_batch(*bitmap_cols[0]->get_ptr(), *col_to);
+ ASSERT_EQ(col_to->size(), bitmap_cols[0]->get_ptr()->size());
+ // from_string assert col_to to assert_column and check same with
mutableColumn
+ auto assert_column = dt_bitmap->create_column();
+ for (int i = 0; i < col_to->size(); ++i) {
+ std::string s = col_to->get_data_at(i).to_string();
+ ReadBuffer rb(s.data(), s.size());
+ ASSERT_EQ(Status::OK(), dt_bitmap->from_string(rb,
assert_column.get()));
+ ASSERT_EQ(assert_column->operator[](i),
bitmap_cols[0]->get_ptr()->operator[](i))
+ << "i: " << i << " s: " << s << " datatype: " <<
dt_bitmap->get_name()
+ << " assert_column: " << assert_column->get_name()
+ << " mutableColumn:" <<
bitmap_cols[0]->get_ptr()->get_name() << std::endl;
+ }
+ std::cout << "finish to_string_batch | from_string test" << std::endl;
+ }
+
+ {
+ // to_string | from_string
+ auto ser_col = ColumnString::create();
+ ser_col->reserve(bitmap_cols[0]->get_ptr()->size());
+ VectorBufferWriter buffer_writer(*ser_col.get());
+ for (int i = 0; i < bitmap_cols[0]->get_ptr()->size(); ++i) {
+ dt_bitmap->to_string(*bitmap_cols[0]->get_ptr(), i, buffer_writer);
+ std::string res = dt_bitmap->to_string(*bitmap_cols[0]->get_ptr(),
i);
+ buffer_writer.commit();
+ EXPECT_EQ(res, data_strs[i]);
+ }
+ // check ser_col to assert_column and check same with mutableColumn
+ auto assert_column_1 = dt_bitmap->create_column();
+ for (int i = 0; i < ser_col->size(); ++i) {
+ std::string s = ser_col->get_data_at(i).to_string();
+ ReadBuffer rb(s.data(), s.size());
+ ASSERT_EQ(Status::OK(), dt_bitmap->from_string(rb,
assert_column_1.get()));
+ auto aaa = assert_column_1->operator[](i);
+ ASSERT_EQ(assert_column_1->operator[](i),
bitmap_cols[0]->get_ptr()->operator[](i));
+ }
+ std::cout << "finish to_string | from_string test" << std::endl;
+ }
+}
+
+// serialize / deserialize
+TEST_P(DataTypeBitMapTest, SerializeDeserializeTest) {
+ MutableColumns bitmap_cols;
+ insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value);
+
+ auto* column = assert_cast<ColumnBitmap*>(bitmap_cols[0].get());
+ auto size = dt_bitmap->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version());
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ auto* result =
+ dt_bitmap->serialize(*column, buf.get(),
BeExecVersionManager::get_newest_version());
+ ASSERT_EQ(result, buf.get() + size);
+
+ auto column2 = dt_bitmap->create_column();
+ dt_bitmap->deserialize(buf.get(), &column2,
BeExecVersionManager::get_newest_version());
+ for (size_t i = 0; i != rows_value; ++i) {
+ auto* column_res = assert_cast<ColumnBitmap*>(column2.get());
+ ASSERT_EQ(column->get_data()[i].to_string(),
column_res->get_data()[i].to_string());
+ }
+ helper->serialize_deserialize_assert(bitmap_cols, {dt_bitmap});
+ std::cout << "finish serialize deserialize test" << std::endl;
+}
+
+// serialize / deserialize as stream
+TEST_P(DataTypeBitMapTest, SerializeDeserializeAsStreamTest) {
+ MutableColumns bitmap_cols;
+ insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value);
+
+ auto ser_col = ColumnString::create();
+ VectorBufferWriter buffer_writer(*ser_col.get());
+ auto* column_data = assert_cast<ColumnBitmap*>(bitmap_cols[0].get());
+ auto c = dt_bitmap->create_column();
+ auto* column_res = assert_cast<ColumnBitmap*>(c.get());
+ column_res->resize(rows_value);
+ for (size_t i = 0; i != rows_value; ++i) {
+
doris::vectorized::DataTypeBitMap::serialize_as_stream(column_data->get_element(i),
+ buffer_writer);
+ buffer_writer.commit();
+ BufferReadable buffer_readable(ser_col->get_data_at(i));
+
doris::vectorized::DataTypeBitMap::deserialize_as_stream(column_res->get_element(i),
+
buffer_readable);
+ ASSERT_EQ(column_data->get_data()[i].to_string(),
column_res->get_data()[i].to_string());
+ }
+ std::cout << "finish serialize deserialize as stream test" << std::endl;
+}
+// sh run-be-ut.sh --run --filter=Params/DataTypeBitMapTest.*
+// need rows_value to cover bitmap all type: empty/single/set/bitmap
+INSTANTIATE_TEST_SUITE_P(Params, DataTypeBitMapTest, ::testing::Values(0, 1,
31, 1024));
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/data_types/data_type_fixed_length_object_test.cpp
b/be/test/vec/data_types/data_type_fixed_length_object_test.cpp
new file mode 100644
index 00000000000..09762819eb7
--- /dev/null
+++ b/be/test/vec/data_types/data_type_fixed_length_object_test.cpp
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/data_types/data_type_fixed_length_object.h"
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <iostream>
+
+#include "agent/be_exec_version_manager.h"
+#include "util/bitmap_value.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_fixed_length_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/common_data_type_serder_test.h"
+#include "vec/data_types/common_data_type_test.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
+
+// 1. datatype meta info:
+// get_type_id, get_type_as_type_descriptor, get_storage_field_type,
have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta
(PColumnMeta *col_meta)
+// get_family_name, get_is_parametric,
should_align_right_in_pretty_formats
+// text_can_contain_only_valid_utf8
+// have_maximum_size_of_value, get_maximum_size_of_value_in_memory,
get_size_of_value_in_memory
+// get_precision, get_scale
+// is_null_literal, is_value_represented_by_number,
is_value_unambiguously_represented_in_contiguous_memory_region
+// 2. datatype creation with column : create_column, create_column_const
(size_t size, const Field &field), create_column_const_with_default_value
(size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int
be_exec_version)
+// 3. serde related: get_serde (int nesting_level=1)
+// to_string (const IColumn &column, size_t row_num, BufferWritable
&ostr), to_string (const IColumn &column, size_t row_num), to_string_batch
(const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb,
IColumn *column)
+// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream
+// serialize (const IColumn &column, char *buf, int be_exec_version),
deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version)
+
+namespace doris::vectorized {
+
+class DataTypeFixedLengthObjectTest : public ::testing::TestWithParam<int> {
+public:
+ void SetUp() override {
+ rows_value = GetParam();
+ helper = std::make_unique<CommonDataTypeTest>();
+ }
+ std::unique_ptr<CommonDataTypeTest> helper;
+ int rows_value;
+ DataTypePtr datatype_fixed_length =
std::make_shared<DataTypeFixedLengthObject>();
+};
+
+TEST_P(DataTypeFixedLengthObjectTest, MetaInfoTest) {
+ TypeDescriptor bitmap_type_descriptor = {PrimitiveType::INVALID_TYPE};
+ auto col_meta = std::make_shared<PColumnMeta>();
+ col_meta->set_type(PGenericType_TypeId_FIXEDLENGTHOBJECT);
+ CommonDataTypeTest::DataTypeMetaInfo bitmap_meta_info_to_assert = {
+ .type_id = TypeIndex::FixedLengthObject,
+ .type_as_type_descriptor = &bitmap_type_descriptor,
+ .family_name = "DataTypeFixedLengthObject",
+ .has_subtypes = false,
+ .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_NONE,
+ .should_align_right_in_pretty_formats = false,
+ .text_can_contain_only_valid_utf8 = false,
+ .have_maximum_size_of_value = false,
+ .size_of_value_in_memory = size_t(-1),
+ .precision = size_t(-1),
+ .scale = size_t(-1),
+ .is_null_literal = false,
+ .is_value_represented_by_number = false,
+ .pColumnMeta = col_meta.get(),
+ .is_value_unambiguously_represented_in_contiguous_memory_region =
false,
+ .default_field = Field(String()),
+ };
+ helper->meta_info_assert(datatype_fixed_length,
bitmap_meta_info_to_assert);
+}
+
+TEST_P(DataTypeFixedLengthObjectTest, CreateColumnTest) {
+ Field default_field = Field(String());
+ std::cout << "create_column_assert: " << datatype_fixed_length->get_name()
<< std::endl;
+ auto column = (datatype_fixed_length)->create_column();
+ ASSERT_EQ(column->size(), 0);
+ auto fixed_length_column = ColumnFixedLengthObject::create(8);
+ fixed_length_column->insert(default_field);
+ ASSERT_EQ(fixed_length_column->size(), 1);
+ auto default_const_col = ColumnFixedLengthObject::create(8);
+ auto data = fixed_length_column->get_data_at(0);
+ default_const_col->insert_data(data.data, data.size);
+ for (int i = 0; i < 1; ++i) {
+ ASSERT_EQ(fixed_length_column->operator[](i),
default_const_col->operator[](i));
+ }
+ // get_uncompressed_serialized_bytes
+ ASSERT_EQ(datatype_fixed_length->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version()),
+ 17);
+}
+
+void insert_data_fixed_length_data(MutableColumns* fixed_length_cols,
+ DataTypePtr datatype_fixed_length, int
rows_value,
+ std::vector<std::string>* data_strs =
nullptr) {
+ auto serde_fixed_length = datatype_fixed_length->get_serde(1);
+ auto column_fixed = ColumnFixedLengthObject::create(sizeof(size_t));
+ column_fixed->resize(rows_value);
+ fixed_length_cols->push_back(column_fixed->get_ptr());
+ DataTypeSerDeSPtrs serde = {datatype_fixed_length->get_serde()};
+ auto& data =
assert_cast<ColumnFixedLengthObject*>((*fixed_length_cols)[0].get())->get_data();
+ for (size_t i = 0; i != rows_value; ++i) {
+ data[i] = i;
+ }
+ std::cout << "finish insert data" << std::endl;
+}
+
+// not support function: get_filed to_string | to_string_batch | from_string
+
+// serialize / deserialize
+TEST_P(DataTypeFixedLengthObjectTest, SerializeDeserializeTest) {
+ MutableColumns fixed_length_cols;
+ insert_data_fixed_length_data(&fixed_length_cols, datatype_fixed_length,
rows_value);
+
+ auto* column =
assert_cast<ColumnFixedLengthObject*>(fixed_length_cols[0].get());
+ auto size = datatype_fixed_length->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version());
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ auto* result = datatype_fixed_length->serialize(*column, buf.get(),
+
BeExecVersionManager::get_newest_version());
+ ASSERT_EQ(result, buf.get() + size);
+
+ auto column2 = datatype_fixed_length->create_column();
+ datatype_fixed_length->deserialize(buf.get(), &column2,
+
BeExecVersionManager::get_newest_version());
+ for (size_t i = 0; i != rows_value; ++i) {
+ auto* column_res =
assert_cast<ColumnFixedLengthObject*>(column2.get());
+ ASSERT_EQ(column->get_data()[i], column_res->get_data()[i]);
+ }
+ helper->serialize_deserialize_assert(fixed_length_cols,
{datatype_fixed_length});
+ std::cout << "finish serialize deserialize test" << std::endl;
+}
+
+INSTANTIATE_TEST_SUITE_P(Params, DataTypeFixedLengthObjectTest,
::testing::Values(0, 1, 31, 1024));
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/data_types/data_type_hll_test.cpp
b/be/test/vec/data_types/data_type_hll_test.cpp
new file mode 100644
index 00000000000..e16f6045217
--- /dev/null
+++ b/be/test/vec/data_types/data_type_hll_test.cpp
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/data_types/data_type_hll.h"
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <iostream>
+
+#include "agent/be_exec_version_manager.h"
+#include "vec/columns/column.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/common_data_type_serder_test.h"
+#include "vec/data_types/common_data_type_test.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
+
+// this test is gonna to be a data type test template for all DataType which
should make ut test to coverage the function defined
+// for example DataTypeHLL should test this function:
+// 1. datatype meta info:
+// get_type_id, get_type_as_type_descriptor, get_storage_field_type,
have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta
(PColumnMeta *col_meta)
+// get_family_name, get_is_parametric,
should_align_right_in_pretty_formats
+// text_can_contain_only_valid_utf8
+// have_maximum_size_of_value, get_maximum_size_of_value_in_memory,
get_size_of_value_in_memory
+// get_precision, get_scale
+// is_null_literal, is_value_represented_by_number,
is_value_unambiguously_represented_in_contiguous_memory_region
+// 2. datatype creation with column : create_column, create_column_const
(size_t size, const Field &field), create_column_const_with_default_value
(size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int
be_exec_version)
+// 3. serde related: get_serde (int nesting_level=1)
+// to_string (const IColumn &column, size_t row_num, BufferWritable
&ostr), to_string (const IColumn &column, size_t row_num), to_string_batch
(const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb,
IColumn *column)
+// serialize (const IColumn &column, char *buf, int be_exec_version),
deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version)
+
+namespace doris::vectorized {
+
+class DataTypeHLLTest : public ::testing::TestWithParam<int> {
+protected:
+ void SetUp() override {
+ rows_value = GetParam();
+ helper = std::make_unique<CommonDataTypeTest>();
+ }
+
+public:
+ std::unique_ptr<CommonDataTypeTest> helper;
+ int rows_value;
+ DataTypePtr dt_hll =
+
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_HLL, 0,
0);
+};
+
+TEST_P(DataTypeHLLTest, MetaInfoTest) {
+ TypeDescriptor hll_type_descriptor = {PrimitiveType::TYPE_HLL};
+ auto col_meta = std::make_shared<PColumnMeta>();
+ col_meta->set_type(PGenericType_TypeId_HLL);
+ CommonDataTypeTest::DataTypeMetaInfo hll_meta_info_to_assert = {
+ .type_id = TypeIndex::HLL,
+ .type_as_type_descriptor = &hll_type_descriptor,
+ .family_name = "HLL",
+ .has_subtypes = false,
+ .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_HLL,
+ .should_align_right_in_pretty_formats = false,
+ .text_can_contain_only_valid_utf8 = true,
+ .have_maximum_size_of_value = false,
+ .size_of_value_in_memory = size_t(-1),
+ .precision = size_t(-1),
+ .scale = size_t(-1),
+ .is_null_literal = false,
+ .is_value_represented_by_number = false,
+ .pColumnMeta = col_meta.get(),
+ .is_value_unambiguously_represented_in_contiguous_memory_region =
true,
+ .default_field = HyperLogLog::empty(),
+ };
+ helper->meta_info_assert(dt_hll, hll_meta_info_to_assert);
+}
+
+TEST_P(DataTypeHLLTest, CreateColumnTest) {
+ Field default_field_hll = HyperLogLog::empty();
+ helper->create_column_assert(dt_hll, default_field_hll, 17);
+}
+
+void insert_data_hll(MutableColumns* hll_cols, DataTypePtr datetype_hll, int
rows_value,
+ std::vector<std::string>* data_strs = nullptr) {
+ auto serde_hll = datetype_hll->get_serde(1);
+ auto column_hll = datetype_hll->create_column();
+
+ hll_cols->push_back(column_hll->get_ptr());
+ DataTypeSerDeSPtrs serde = {datetype_hll->get_serde()};
+ auto& data = assert_cast<ColumnHLL*>((*hll_cols)[0].get())->get_data();
+ for (size_t i = 0; i != rows_value; ++i) {
+ HyperLogLog hll_value;
+ for (size_t j = 0; j <= i; ++j) {
+ hll_value.update(j);
+ }
+ if (data_strs) {
+ data_strs->push_back(hll_value.to_string());
+ }
+ std::string memory_buffer(hll_value.max_serialized_size(), '0');
+ hll_value.serialize(reinterpret_cast<uint8_t*>(memory_buffer.data()));
+ data.emplace_back(std::move(hll_value));
+ }
+ std::cout << "finish insert data" << std::endl;
+}
+
+// test to_string | to_string_batch | from_string
+TEST_P(DataTypeHLLTest, FromAndToStringTest) {
+ MutableColumns hll_cols;
+ std::vector<std::string> data_strs;
+ insert_data_hll(&hll_cols, dt_hll, rows_value, &data_strs);
+
+ {
+ // to_string_batch | from_string
+ auto col_to = ColumnString::create();
+ dt_hll->to_string_batch(*hll_cols[0]->get_ptr(), *col_to);
+ ASSERT_EQ(col_to->size(), hll_cols[0]->get_ptr()->size());
+ // from_string assert col_to to assert_column and check same with
mutableColumn
+ auto assert_column = dt_hll->create_column();
+ for (int i = 0; i < col_to->size(); ++i) {
+ std::string s = col_to->get_data_at(i).to_string();
+ ReadBuffer rb(s.data(), s.size());
+ ASSERT_EQ(Status::OK(), dt_hll->from_string(rb,
assert_column.get()));
+ ASSERT_EQ(assert_column->operator[](i),
hll_cols[0]->get_ptr()->operator[](i))
+ << "i: " << i << " s: " << s << " datatype: " <<
dt_hll->get_name()
+ << " assert_column: " << assert_column->get_name()
+ << " mutableColumn:" << hll_cols[0]->get_ptr()->get_name()
<< std::endl;
+ }
+ std::cout << "finish to_string_batch | from_string test" << std::endl;
+ }
+
+ {
+ // to_string | from_string
+ auto ser_col = ColumnString::create();
+ ser_col->reserve(hll_cols[0]->get_ptr()->size());
+ VectorBufferWriter buffer_writer(*ser_col.get());
+ for (int i = 0; i < hll_cols[0]->get_ptr()->size(); ++i) {
+ dt_hll->to_string(*hll_cols[0]->get_ptr(), i, buffer_writer);
+ std::string res = dt_hll->to_string(*hll_cols[0]->get_ptr(), i);
+ buffer_writer.commit();
+ EXPECT_EQ(res, "HLL()"); // HLL to_string is not implemented
+ }
+ // check ser_col to assert_column and check same with mutableColumn
+ auto assert_column_1 = dt_hll->create_column();
+ for (int i = 0; i < ser_col->size(); ++i) {
+ std::string s = ser_col->get_data_at(i).to_string();
+ ReadBuffer rb(s.data(), s.size());
+ ASSERT_EQ(Status::OK(), dt_hll->from_string(rb,
assert_column_1.get()));
+ auto aaa = assert_column_1->operator[](i);
+ ASSERT_EQ(assert_column_1->operator[](i),
hll_cols[0]->get_ptr()->operator[](i));
+ }
+ std::cout << "finish to_string | from_string test" << std::endl;
+ }
+}
+
+// serialize / deserialize
+TEST_P(DataTypeHLLTest, SerializeDeserializeTest) {
+ MutableColumns hll_cols;
+ insert_data_hll(&hll_cols, dt_hll, rows_value);
+
+ auto* column = assert_cast<ColumnHLL*>(hll_cols[0].get());
+ auto size = dt_hll->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version());
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ auto* result =
+ dt_hll->serialize(*column, buf.get(),
BeExecVersionManager::get_newest_version());
+ ASSERT_EQ(result, buf.get() + size);
+
+ auto column2 = dt_hll->create_column();
+ dt_hll->deserialize(buf.get(), &column2,
BeExecVersionManager::get_newest_version());
+ for (size_t i = 0; i != rows_value; ++i) {
+ auto* column_res = assert_cast<ColumnHLL*>(column2.get());
+ ASSERT_EQ(column->get_data()[i].to_string(),
column_res->get_data()[i].to_string());
+ }
+ helper->serialize_deserialize_assert(hll_cols, {dt_hll});
+ std::cout << "finish serialize deserialize test" << std::endl;
+}
+
+// serialize / deserialize as stream
+TEST_P(DataTypeHLLTest, SerializeDeserializeAsStreamTest) {
+ MutableColumns hll_cols;
+ insert_data_hll(&hll_cols, dt_hll, rows_value);
+
+ auto ser_col = ColumnString::create();
+ VectorBufferWriter buffer_writer(*ser_col.get());
+ auto* column_data = assert_cast<ColumnHLL*>(hll_cols[0].get());
+ auto c = dt_hll->create_column();
+ auto* column_res = assert_cast<ColumnHLL*>(c.get());
+ column_res->resize(rows_value);
+ for (size_t i = 0; i != rows_value; ++i) {
+
doris::vectorized::DataTypeHLL::serialize_as_stream(column_data->get_element(i),
+ buffer_writer);
+ buffer_writer.commit();
+ BufferReadable buffer_readable(ser_col->get_data_at(i));
+
doris::vectorized::DataTypeHLL::deserialize_as_stream(column_res->get_element(i),
+ buffer_readable);
+ ASSERT_EQ(column_data->get_data()[i].to_string(),
column_res->get_data()[i].to_string());
+ }
+ std::cout << "finish serialize deserialize as stream test" << std::endl;
+}
+
+INSTANTIATE_TEST_SUITE_P(Params, DataTypeHLLTest, ::testing::Values(0, 1, 10,
100));
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/data_types/data_type_ip_test.cpp
b/be/test/vec/data_types/data_type_ip_test.cpp
index c500c7cf2dd..d26806b7dcb 100644
--- a/be/test/vec/data_types/data_type_ip_test.cpp
+++ b/be/test/vec/data_types/data_type_ip_test.cpp
@@ -126,8 +126,8 @@ TEST_F(DataTypeIPTest, MetaInfoTest) {
TEST_F(DataTypeIPTest, CreateColumnTest) {
Field default_field_ipv4 = IPv4(0);
Field default_field_ipv6 = IPv6(0);
- create_column_assert(dt_ipv4, default_field_ipv4, 4);
- create_column_assert(dt_ipv6, default_field_ipv6, 4);
+ create_column_assert(dt_ipv4, default_field_ipv4, 17);
+ create_column_assert(dt_ipv6, default_field_ipv6, 17);
}
TEST_F(DataTypeIPTest, GetFieldTest) {
diff --git a/be/test/vec/data_types/data_type_quantile_state_test.cpp
b/be/test/vec/data_types/data_type_quantile_state_test.cpp
new file mode 100644
index 00000000000..dcd8d58503c
--- /dev/null
+++ b/be/test/vec/data_types/data_type_quantile_state_test.cpp
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <iostream>
+
+#include "agent/be_exec_version_manager.h"
+#include "vec/columns/column.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/common_data_type_serder_test.h"
+#include "vec/data_types/common_data_type_test.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_quantilestate.h"
+
+// this test is gonna to be a data type test template for all DataType which
should make ut test to coverage the function defined
+// for example DataTypeQuantileState should test this function:
+// 1. datatype meta info:
+// get_type_id, get_type_as_type_descriptor, get_storage_field_type,
have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta
(PColumnMeta *col_meta)
+// get_family_name, get_is_parametric,
should_align_right_in_pretty_formats
+// text_can_contain_only_valid_utf8
+// have_maximum_size_of_value, get_maximum_size_of_value_in_memory,
get_size_of_value_in_memory
+// get_precision, get_scale
+// is_null_literal, is_value_represented_by_number,
is_value_unambiguously_represented_in_contiguous_memory_region
+// 2. datatype creation with column : create_column, create_column_const
(size_t size, const Field &field), create_column_const_with_default_value
(size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int
be_exec_version)
+// 3. serde related: get_serde (int nesting_level=1)
+// to_string (const IColumn &column, size_t row_num, BufferWritable
&ostr), to_string (const IColumn &column, size_t row_num), to_string_batch
(const IColumn &column, ColumnString &column_to)
+// serialize (const IColumn &column, char *buf, int be_exec_version),
deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version)
+
+namespace doris::vectorized {
+
+class DataTypeQuantileStateTest : public ::testing::TestWithParam<int> {
+protected:
+ void SetUp() override {
+ rows_value = GetParam();
+ helper = std::make_unique<CommonDataTypeTest>();
+ }
+
+public:
+ std::unique_ptr<CommonDataTypeTest> helper;
+ int rows_value;
+ DataTypePtr datatype_quantile_state =
DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE, 0, 0);
+};
+
+TEST_P(DataTypeQuantileStateTest, MetaInfoTest) {
+ TypeDescriptor quantile_state_type_descriptor =
{PrimitiveType::TYPE_QUANTILE_STATE};
+ auto col_meta = std::make_shared<PColumnMeta>();
+ col_meta->set_type(PGenericType_TypeId_QUANTILE_STATE);
+ CommonDataTypeTest::DataTypeMetaInfo quantile_state_meta_info_to_assert = {
+ .type_id = TypeIndex::QuantileState,
+ .type_as_type_descriptor = &quantile_state_type_descriptor,
+ .family_name = "QuantileState",
+ .has_subtypes = false,
+ .storage_field_type =
doris::FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE,
+ .should_align_right_in_pretty_formats = false,
+ .text_can_contain_only_valid_utf8 = true,
+ .have_maximum_size_of_value = false,
+ .size_of_value_in_memory = size_t(-1),
+ .precision = size_t(-1),
+ .scale = size_t(-1),
+ .is_null_literal = false,
+ .is_value_represented_by_number = false,
+ .pColumnMeta = col_meta.get(),
+ .is_value_unambiguously_represented_in_contiguous_memory_region =
true,
+ .default_field = QuantileState(),
+ };
+ helper->meta_info_assert(datatype_quantile_state,
quantile_state_meta_info_to_assert);
+}
+
+TEST_P(DataTypeQuantileStateTest, CreateColumnTest) {
+ Field default_field_quantile_state = QuantileState();
+ helper->create_column_assert(datatype_quantile_state,
default_field_quantile_state, 17);
+}
+
+void insert_data_quantile_state(MutableColumns* quantile_state_cols,
+ DataTypePtr datetype_quantile_state, int
rows_value,
+ std::vector<std::string>* data_strs = nullptr)
{
+ auto serde_quantile_state = datetype_quantile_state->get_serde(1);
+ auto column_quantile_state = datetype_quantile_state->create_column();
+
+ quantile_state_cols->push_back(column_quantile_state->get_ptr());
+ DataTypeSerDeSPtrs serde = {datetype_quantile_state->get_serde()};
+ auto& data =
assert_cast<ColumnQuantileState*>((*quantile_state_cols)[0].get())->get_data();
+ for (size_t i = 0; i != rows_value; ++i) {
+ QuantileState quantile_state_value;
+ for (size_t j = 0; j <= i; ++j) {
+ quantile_state_value.add_value(j);
+ }
+ std::string memory_buffer(quantile_state_value.get_serialized_size(),
'0');
+
quantile_state_value.serialize(reinterpret_cast<uint8_t*>(memory_buffer.data()));
+ data.emplace_back(std::move(quantile_state_value));
+ }
+ std::cout << "finish insert data" << std::endl;
+}
+
+// test to_string | to_string_batch | from_string
+TEST_P(DataTypeQuantileStateTest, FromAndToStringTest) {
+ MutableColumns quantile_state_cols;
+ std::vector<std::string> data_strs;
+ insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state,
rows_value,
+ &data_strs);
+
+ {
+ // to_string_batch | from_string
+ auto col_to = ColumnString::create();
+
datatype_quantile_state->to_string_batch(*quantile_state_cols[0]->get_ptr(),
*col_to);
+ ASSERT_EQ(col_to->size(), quantile_state_cols[0]->get_ptr()->size());
+ std::cout << "finish to_string_batch | from_string not support test"
<< std::endl;
+ }
+
+ {
+ // to_string | from_string
+ auto ser_col = ColumnString::create();
+ ser_col->reserve(quantile_state_cols[0]->get_ptr()->size());
+ VectorBufferWriter buffer_writer(*ser_col.get());
+ for (int i = 0; i < quantile_state_cols[0]->get_ptr()->size(); ++i) {
+
datatype_quantile_state->to_string(*quantile_state_cols[0]->get_ptr(), i,
+ buffer_writer);
+ std::string res =
+
datatype_quantile_state->to_string(*quantile_state_cols[0]->get_ptr(), i);
+ buffer_writer.commit();
+ EXPECT_EQ(res, "QuantileState()"); // QuantileState to_string is
not implemented
+ }
+ std::cout << "finish to_string | from_string not support test" <<
std::endl;
+ }
+}
+
+// serialize / deserialize
+TEST_P(DataTypeQuantileStateTest, SerializeDeserializeTest) {
+ MutableColumns quantile_state_cols;
+ insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state,
rows_value);
+
+ auto* column =
assert_cast<ColumnQuantileState*>(quantile_state_cols[0].get());
+ auto size = datatype_quantile_state->get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version());
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ auto* result = datatype_quantile_state->serialize(*column, buf.get(),
+
BeExecVersionManager::get_newest_version());
+ ASSERT_EQ(result, buf.get() + size);
+
+ auto column2 = datatype_quantile_state->create_column();
+ datatype_quantile_state->deserialize(buf.get(), &column2,
+
BeExecVersionManager::get_newest_version());
+ for (size_t i = 0; i != rows_value; ++i) {
+ auto* column_res = assert_cast<ColumnQuantileState*>(column2.get());
+ ASSERT_EQ(column->get_data()[i].get_serialized_size(),
+ column_res->get_data()[i].get_serialized_size());
+ }
+ helper->serialize_deserialize_assert(quantile_state_cols,
{datatype_quantile_state});
+ std::cout << "finish serialize deserialize test" << std::endl;
+}
+
+// serialize / deserialize as stream
+TEST_P(DataTypeQuantileStateTest, SerializeDeserializeAsStreamTest) {
+ MutableColumns quantile_state_cols;
+ insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state,
rows_value);
+
+ auto ser_col = ColumnString::create();
+ VectorBufferWriter buffer_writer(*ser_col.get());
+ auto* column_data =
assert_cast<ColumnQuantileState*>(quantile_state_cols[0].get());
+ auto c = datatype_quantile_state->create_column();
+ auto* column_res = assert_cast<ColumnQuantileState*>(c.get());
+ column_res->resize(rows_value);
+ for (size_t i = 0; i != rows_value; ++i) {
+
doris::vectorized::DataTypeQuantileState::serialize_as_stream(column_data->get_element(i),
+
buffer_writer);
+ buffer_writer.commit();
+ BufferReadable buffer_readable(ser_col->get_data_at(i));
+
doris::vectorized::DataTypeQuantileState::deserialize_as_stream(column_res->get_element(i),
+
buffer_readable);
+ ASSERT_EQ(column_data->get_data()[i].get_serialized_size(),
+ column_res->get_data()[i].get_serialized_size());
+ }
+ std::cout << "finish serialize deserialize as stream test" << std::endl;
+}
+
+INSTANTIATE_TEST_SUITE_P(Params, DataTypeQuantileStateTest,
::testing::Values(0, 1, 100, 1000));
+} // namespace doris::vectorized
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]