This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8be13b0e2db [Refactor](block) Refactor interface of shrink column
(#44046)
8be13b0e2db is described below
commit 8be13b0e2db1c589414b8a4f046a4c102093b123
Author: zclllhhjj <[email protected]>
AuthorDate: Tue Nov 19 19:24:00 2024 +0800
[Refactor](block) Refactor interface of shrink column (#44046)
Refactor interface of shrink column
---
be/src/vec/columns/column.h | 13 ++------
be/src/vec/columns/column_array.cpp | 12 ++------
be/src/vec/columns/column_array.h | 3 +-
be/src/vec/columns/column_map.cpp | 24 ++-------------
be/src/vec/columns/column_map.h | 3 +-
be/src/vec/columns/column_nullable.cpp | 13 ++------
be/src/vec/columns/column_nullable.h | 4 +--
be/src/vec/columns/column_object.h | 6 ----
be/src/vec/columns/column_string.cpp | 31 ++++++++++++-------
be/src/vec/columns/column_string.h | 4 +--
be/src/vec/columns/column_struct.cpp | 24 ++-------------
be/src/vec/columns/column_struct.h | 3 +-
be/src/vec/core/block.cpp | 2 +-
be/test/vec/columns/column_string_test.cpp | 48 ++++++++++++++++++++++++++++++
14 files changed, 90 insertions(+), 100 deletions(-)
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index d45757b329c..975827bd13e 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -126,16 +126,9 @@ public:
return nullptr;
}
- // shrink the end zeros for CHAR type or ARRAY<CHAR> type
- virtual MutablePtr get_shrinked_column() {
- throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
- "Method get_shrinked_column is not supported
for " + get_name());
- return nullptr;
- }
-
- // check the column whether could shrinked
- // now support only in char type, or the nested type in complex type:
array{char}, struct{char}, map{char}
- virtual bool could_shrinked_column() { return false; }
+ // shrink the end zeros for ColumnStr(also for who has it nested). so nest
column will call it for all nested.
+ // for non-str col, will reach here(do nothing). only ColumnStr will
really shrink itself.
+ virtual void shrink_padding_chars() {}
/// Some columns may require finalization before using of other operations.
virtual void finalize() {}
diff --git a/be/src/vec/columns/column_array.cpp
b/be/src/vec/columns/column_array.cpp
index e66e016381e..bd4464e2caf 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -79,16 +79,8 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) :
data(std::move(nest
offsets = ColumnOffsets::create();
}
-bool ColumnArray::could_shrinked_column() {
- return data->could_shrinked_column();
-}
-
-MutableColumnPtr ColumnArray::get_shrinked_column() {
- if (could_shrinked_column()) {
- return ColumnArray::create(data->get_shrinked_column(),
offsets->assume_mutable());
- } else {
- return ColumnArray::create(data->assume_mutable(),
offsets->assume_mutable());
- }
+void ColumnArray::shrink_padding_chars() {
+ data->shrink_padding_chars();
}
std::string ColumnArray::get_name() const {
diff --git a/be/src/vec/columns/column_array.h
b/be/src/vec/columns/column_array.h
index a3c79ffaa02..4dbc8e91e52 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -112,8 +112,7 @@ public:
return Base::create(std::forward<Args>(args)...);
}
- MutableColumnPtr get_shrinked_column() override;
- bool could_shrinked_column() override;
+ void shrink_padding_chars() override;
/** On the index i there is an offset to the beginning of the i + 1 -th
element. */
using ColumnOffsets = ColumnVector<Offset64>;
diff --git a/be/src/vec/columns/column_map.cpp
b/be/src/vec/columns/column_map.cpp
index 85964ca967b..eb3b431a229 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -502,27 +502,9 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets)
const {
return res;
}
-bool ColumnMap::could_shrinked_column() {
- return keys_column->could_shrinked_column() ||
values_column->could_shrinked_column();
-}
-
-MutableColumnPtr ColumnMap::get_shrinked_column() {
- MutableColumns new_columns(2);
-
- if (keys_column->could_shrinked_column()) {
- new_columns[0] = keys_column->get_shrinked_column();
- } else {
- new_columns[0] = keys_column->get_ptr();
- }
-
- if (values_column->could_shrinked_column()) {
- new_columns[1] = values_column->get_shrinked_column();
- } else {
- new_columns[1] = values_column->get_ptr();
- }
-
- return ColumnMap::create(new_columns[0]->assume_mutable(),
new_columns[1]->assume_mutable(),
- offsets_column->assume_mutable());
+void ColumnMap::shrink_padding_chars() {
+ keys_column->shrink_padding_chars();
+ values_column->shrink_padding_chars();
}
void ColumnMap::reserve(size_t n) {
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 08eee11c436..ae482a2d4e0 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -118,8 +118,7 @@ public:
const char* deserialize_and_insert_from_arena(const char* pos) override;
void update_hash_with_value(size_t n, SipHash& hash) const override;
- MutableColumnPtr get_shrinked_column() override;
- bool could_shrinked_column() override;
+ void shrink_padding_chars() override;
ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const
override;
size_t filter(const Filter& filter) override;
ColumnPtr permute(const Permutation& perm, size_t limit) const override;
diff --git a/be/src/vec/columns/column_nullable.cpp
b/be/src/vec/columns/column_nullable.cpp
index 8fffe1dfe65..493aabdae31 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -51,17 +51,8 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&&
nested_column_, MutableColumnP
_need_update_has_null = true;
}
-bool ColumnNullable::could_shrinked_column() {
- return get_nested_column_ptr()->could_shrinked_column();
-}
-
-MutableColumnPtr ColumnNullable::get_shrinked_column() {
- if (could_shrinked_column()) {
- return
ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(),
- get_null_map_column_ptr());
- } else {
- return ColumnNullable::create(get_nested_column_ptr(),
get_null_map_column_ptr());
- }
+void ColumnNullable::shrink_padding_chars() {
+ get_nested_column_ptr()->shrink_padding_chars();
}
void ColumnNullable::update_xxHash_with_value(size_t start, size_t end,
uint64_t& hash,
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index 35787cd52d8..6096fd4b669 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -143,8 +143,8 @@ public:
return Base::create(std::forward<Args>(args)...);
}
- MutableColumnPtr get_shrinked_column() override;
- bool could_shrinked_column() override;
+ void shrink_padding_chars() override;
+
bool is_variable_length() const override { return
nested_column->is_variable_length(); }
std::string get_name() const override { return "Nullable(" +
nested_column->get_name() + ")"; }
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index d7dd0a88da8..244692e9ddd 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -446,12 +446,6 @@ public:
void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
const uint8_t* __restrict null_data) const
override;
- // Not implemented
- MutableColumnPtr get_shrinked_column() override {
- throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
- "get_shrinked_column" + get_name());
- }
-
Int64 get_int(size_t /*n*/) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_int" +
get_name());
}
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index 6eb3e45b2e0..3caa194551b 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -22,6 +22,7 @@
#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
+#include <cstring>
#include "util/memcpy_inlined.h"
#include "util/simd/bits.h"
@@ -81,16 +82,26 @@ MutableColumnPtr ColumnStr<T>::clone_resized(size_t
to_size) const {
}
template <typename T>
-MutableColumnPtr ColumnStr<T>::get_shrinked_column() {
- auto shrinked_column = ColumnStr<T>::create();
- shrinked_column->get_offsets().reserve(offsets.size());
- shrinked_column->get_chars().reserve(chars.size());
- for (int i = 0; i < size(); i++) {
- StringRef str = get_data_at(i);
- reinterpret_cast<ColumnStr<T>*>(shrinked_column.get())
- ->insert_data(str.data, strnlen(str.data, str.size));
- }
- return shrinked_column;
+void ColumnStr<T>::shrink_padding_chars() {
+ if (size() == 0) {
+ return;
+ }
+ char* data = reinterpret_cast<char*>(chars.data());
+ auto* offset = offsets.data();
+ size_t size = offsets.size();
+
+ // deal the 0-th element. no need to move.
+ auto next_start = offset[0];
+ offset[0] = strnlen(data, size_at(0));
+ for (size_t i = 1; i < size; i++) {
+ // get the i-th length and whole move it to cover the last's trailing
void
+ auto length = strnlen(data + next_start, offset[i] - next_start);
+ memmove(data + offset[i - 1], data + next_start, length);
+ // offset i will be changed. so save the old value for (i+1)-th to get
its length.
+ next_start = offset[i];
+ offset[i] = offset[i - 1] + length;
+ }
+ chars.resize_fill(offsets.back()); // just call it to shrink memory here.
no possible to expand.
}
// This method is only called by MutableBlock::merge_ignore_overflow
diff --git a/be/src/vec/columns/column_string.h
b/be/src/vec/columns/column_string.h
index 0881c887288..a72a3ec5cdc 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -85,6 +85,7 @@ private:
/// For convenience, every string ends with terminating zero byte. Note
that strings could contain zero bytes in the middle.
Chars chars;
+ // Start position of i-th element.
size_t ALWAYS_INLINE offset_at(ssize_t i) const { return offsets[i - 1]; }
/// Size of i-th element, including terminating zero.
@@ -117,8 +118,7 @@ public:
MutableColumnPtr clone_resized(size_t to_size) const override;
- MutableColumnPtr get_shrinked_column() override;
- bool could_shrinked_column() override { return true; }
+ void shrink_padding_chars() override;
Field operator[](size_t n) const override {
assert(n < size());
diff --git a/be/src/vec/columns/column_struct.cpp
b/be/src/vec/columns/column_struct.cpp
index ec0c5e6a687..3a238a09c0d 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -313,28 +313,10 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets)
const {
return ColumnStruct::create(new_columns);
}
-bool ColumnStruct::could_shrinked_column() {
- const size_t tuple_size = columns.size();
- for (size_t i = 0; i < tuple_size; ++i) {
- if (columns[i]->could_shrinked_column()) {
- return true;
- }
- }
- return false;
-}
-
-MutableColumnPtr ColumnStruct::get_shrinked_column() {
- const size_t tuple_size = columns.size();
- MutableColumns new_columns(tuple_size);
-
- for (size_t i = 0; i < tuple_size; ++i) {
- if (columns[i]->could_shrinked_column()) {
- new_columns[i] = columns[i]->get_shrinked_column();
- } else {
- new_columns[i] = columns[i]->get_ptr();
- }
+void ColumnStruct::shrink_padding_chars() {
+ for (auto& column : columns) {
+ column->shrink_padding_chars();
}
- return ColumnStruct::create(std::move(new_columns));
}
void ColumnStruct::reserve(size_t n) {
diff --git a/be/src/vec/columns/column_struct.h
b/be/src/vec/columns/column_struct.h
index 15836604792..e9f8014d9db 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -151,8 +151,7 @@ public:
int compare_at(size_t n, size_t m, const IColumn& rhs_, int
nan_direction_hint) const override;
- MutableColumnPtr get_shrinked_column() override;
- bool could_shrinked_column() override;
+ void shrink_padding_chars() override;
void reserve(size_t n) override;
void resize(size_t n) override;
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 11075335fb1..b15b83cf77e 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -1215,7 +1215,7 @@ void Block::shrink_char_type_column_suffix_zero(const
std::vector<size_t>& char_
for (auto idx : char_type_idx) {
if (idx < data.size()) {
auto& col_and_name = this->get_by_position(idx);
- col_and_name.column =
col_and_name.column->assume_mutable()->get_shrinked_column();
+ col_and_name.column->assume_mutable()->shrink_padding_chars();
}
}
}
diff --git a/be/test/vec/columns/column_string_test.cpp
b/be/test/vec/columns/column_string_test.cpp
new file mode 100644
index 00000000000..07195afe1cf
--- /dev/null
+++ b/be/test/vec/columns/column_string_test.cpp
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_string.h"
+
+#include <gmock/gmock-more-matchers.h>
+#include <gtest/gtest.h>
+
+#include "vec/common/string_ref.h"
+#include "vec/core/types.h"
+
+using namespace doris;
+using namespace doris::vectorized;
+
+TEST(ColumnStringTest, shrink_padding_chars) {
+ ColumnString::MutablePtr col = ColumnString::create();
+ col->insert_data("123\0 ", 7);
+ col->insert_data("456\0xx", 6);
+ col->insert_data("78", 2);
+ col->shrink_padding_chars();
+
+ EXPECT_EQ(col->size(), 3);
+ EXPECT_EQ(col->get_data_at(0), StringRef("123"));
+ EXPECT_EQ(col->get_data_at(0).size, 3);
+ EXPECT_EQ(col->get_data_at(1), StringRef("456"));
+ EXPECT_EQ(col->get_data_at(1).size, 3);
+ EXPECT_EQ(col->get_data_at(2), StringRef("78"));
+ EXPECT_EQ(col->get_data_at(2).size, 2);
+
+ col->insert_data("xyz", 2); // only xy
+
+ EXPECT_EQ(col->size(), 4);
+ EXPECT_EQ(col->get_data_at(3), StringRef("xy"));
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]