This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 83ea4ea984 [refractor](bitmap) bitmap serialize and deserialize
refractor (#11921)
83ea4ea984 is described below
commit 83ea4ea984a462a2c7e1f8915c1a5ad36f21cd30
Author: camby <[email protected]>
AuthorDate: Mon Aug 22 08:52:20 2022 +0800
[refractor](bitmap) bitmap serialize and deserialize refractor (#11921)
Co-authored-by: cambyzju <[email protected]>
---
be/src/vec/data_types/data_type_bitmap.cpp | 40 ++++++++++-----------
be/test/vec/core/column_complex_test.cpp | 56 ++++++++++++++++++++++++++++++
2 files changed, 74 insertions(+), 22 deletions(-)
diff --git a/be/src/vec/data_types/data_type_bitmap.cpp
b/be/src/vec/data_types/data_type_bitmap.cpp
index cb1a2f2e45..5c49f9c65c 100644
--- a/be/src/vec/data_types/data_type_bitmap.cpp
+++ b/be/src/vec/data_types/data_type_bitmap.cpp
@@ -45,24 +45,22 @@ char* DataTypeBitMap::serialize(const IColumn& column,
char* buf) const {
auto& data_column = assert_cast<const ColumnBitmap&>(*ptr);
// serialize the bitmap size array, row num saves at index 0
- const auto row_num = column.size();
- size_t bitmap_size_array[row_num + 1];
- bitmap_size_array[0] = row_num;
- for (size_t i = 0; i < row_num; ++i) {
+ size_t* meta_ptr = (size_t*)buf;
+ meta_ptr[0] = column.size();
+ for (size_t i = 0; i < meta_ptr[0]; ++i) {
auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i));
- bitmap_size_array[i + 1] = bitmap.getSizeInBytes();
+ meta_ptr[i + 1] = bitmap.getSizeInBytes();
}
- auto allocate_len_size = sizeof(size_t) * (row_num + 1);
- memcpy(buf, bitmap_size_array, allocate_len_size);
- buf += allocate_len_size;
+
// serialize each bitmap
- for (size_t i = 0; i < row_num; ++i) {
+ char* data_ptr = buf + sizeof(size_t) * (meta_ptr[0] + 1);
+ for (size_t i = 0; i < meta_ptr[0]; ++i) {
auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i));
- bitmap.write(buf);
- buf += bitmap_size_array[i + 1];
+ bitmap.write(data_ptr);
+ data_ptr += meta_ptr[i + 1];
}
- return buf;
+ return data_ptr;
}
const char* DataTypeBitMap::deserialize(const char* buf, IColumn* column)
const {
@@ -70,19 +68,17 @@ const char* DataTypeBitMap::deserialize(const char* buf,
IColumn* column) const
auto& data = data_column.get_data();
// deserialize the bitmap size array
- size_t row_num = *reinterpret_cast<const size_t*>(buf);
- buf += sizeof(size_t);
- size_t bitmap_size_array[row_num];
- memcpy(bitmap_size_array, buf, sizeof(size_t) * row_num);
- buf += sizeof(size_t) * row_num;
+ const size_t* meta_ptr = reinterpret_cast<const size_t*>(buf);
+
// deserialize each bitmap
- data.resize(row_num);
- for (int i = 0; i < row_num; ++i) {
- data[i].deserialize(buf);
- buf += bitmap_size_array[i];
+ data.resize(meta_ptr[0]);
+ const char* data_ptr = buf + sizeof(size_t) * (meta_ptr[0] + 1);
+ for (size_t i = 0; i < meta_ptr[0]; ++i) {
+ data[i].deserialize(data_ptr);
+ data_ptr += meta_ptr[i + 1];
}
- return buf;
+ return data_ptr;
}
MutableColumnPtr DataTypeBitMap::create_column() const {
diff --git a/be/test/vec/core/column_complex_test.cpp
b/be/test/vec/core/column_complex_test.cpp
index 1eb8cd906d..ce9e4d60f3 100644
--- a/be/test/vec/core/column_complex_test.cpp
+++ b/be/test/vec/core/column_complex_test.cpp
@@ -44,4 +44,60 @@ TEST(ColumnComplexTest, BasicTest) {
TEST(ColumnComplexType, DataTypeBitmapTest) {
std::make_shared<DataTypeBitMap>();
}
+
+class ColumnBitmapTest : public testing::Test {
+public:
+ virtual void SetUp() override {}
+ virtual void TearDown() override {}
+
+ void check_bitmap_column(const IColumn& l, const IColumn& r) {
+ ASSERT_EQ(l.size(), r.size());
+ const auto& l_col = assert_cast<const ColumnBitmap&>(l);
+ const auto& r_col = assert_cast<const ColumnBitmap&>(r);
+ for (size_t i = 0; i < l_col.size(); ++i) {
+ auto& l_bitmap = const_cast<BitmapValue&>(l_col.get_element(i));
+ auto& r_bitmap = const_cast<BitmapValue&>(r_col.get_element(i));
+ ASSERT_EQ(l_bitmap.xor_cardinality(r_bitmap), 0);
+ }
+ }
+
+ void check_serialize_and_deserialize(MutableColumnPtr& col) {
+ auto column = assert_cast<ColumnBitmap*>(col.get());
+ auto size = _bitmap_type.get_uncompressed_serialized_bytes(*column);
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ auto result = _bitmap_type.serialize(*column, buf.get());
+ ASSERT_EQ(result, buf.get() + size);
+
+ auto column2 = _bitmap_type.create_column();
+ _bitmap_type.deserialize(buf.get(), column2.get());
+ check_bitmap_column(*column, *column2.get());
+ }
+
+private:
+ DataTypeBitMap _bitmap_type;
+};
+
+TEST_F(ColumnBitmapTest, SerializeAndDeserialize) {
+ auto column = _bitmap_type.create_column();
+
+ // empty column
+ check_serialize_and_deserialize(column);
+
+ // bitmap with lots of rows
+ const size_t row_size = 20000;
+ auto& data = assert_cast<ColumnBitmap&>(*column.get()).get_data();
+ data.resize(row_size);
+ check_serialize_and_deserialize(column);
+
+ // bitmap with values case 1
+ data[0].add(10);
+ data[0].add(1000000);
+ check_serialize_and_deserialize(column);
+
+ // bitmap with values case 2
+ data[row_size - 1].add(33333);
+ data[row_size - 1].add(0);
+ check_serialize_and_deserialize(column);
+}
+
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]