This is an automated email from the ASF dual-hosted git repository.
Mryange pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2b4e9061c7b [refine](function) avoid unnecessary COW column clones
(#64627)
2b4e9061c7b is described below
commit 2b4e9061c7b949108a605d0ee92bbd2e7dd78bdc
Author: Mryange <[email protected]>
AuthorDate: Tue Jun 23 14:07:57 2026 +0800
[refine](function) avoid unnecessary COW column clones (#64627)
### What problem does this PR solve?
Some function implementations cloned nullable null maps, array offsets,
or pass-through columns even though the result only needs to share
immutable column data. This change reuses those COW subcolumns directly
in non-mutating paths and keeps explicit clones for paths that modify
result data.
### Release note
None
---
be/src/exec/common/util.hpp | 23 ++++++++++++++--------
.../exprs/function/array/function_array_element.h | 5 ++---
.../function/array/function_array_enumerate.cpp | 3 +--
.../array/function_array_enumerate_uniq.cpp | 5 ++---
.../exprs/function/array/function_array_exists.cpp | 12 ++++-------
.../exprs/function/array/function_array_sortby.cpp | 13 ++++++------
be/src/exprs/function/cast/cast_to_variant.h | 4 +---
be/src/exprs/function/function.cpp | 2 +-
.../function_date_or_datetime_to_string.cpp | 6 ++----
be/src/exprs/function/function_nullables.cpp | 6 +++---
be/src/exprs/function/function_variant_element.cpp | 3 +--
be/src/exprs/runtime_filter_expr.cpp | 2 +-
be/src/exprs/vtopn_pred.h | 2 +-
.../function/cast/function_variant_cast_test.cpp | 12 ++++++-----
14 files changed, 47 insertions(+), 51 deletions(-)
diff --git a/be/src/exec/common/util.hpp b/be/src/exec/common/util.hpp
index 013d0a4f036..a975c0df900 100644
--- a/be/src/exec/common/util.hpp
+++ b/be/src/exec/common/util.hpp
@@ -243,29 +243,36 @@ inline ColumnPtr create_always_true_column(size_t size,
bool is_nullable) {
}
// change null element to true element
-inline void change_null_to_true(MutableColumnPtr column, ColumnPtr argument =
nullptr) {
+inline ColumnPtr change_null_to_true(ColumnPtr&& column, const ColumnPtr&
argument = nullptr) {
size_t rows = column->size();
if (is_column_const(*column)) {
- change_null_to_true(
-
assert_cast<ColumnConst*>(column.get())->get_data_column_ptr()->assert_mutable());
- } else if (column->has_null()) {
- auto* nullable = assert_cast<ColumnNullable*>(column.get());
+ auto nested_column = assert_cast<const
ColumnConst*>(column.get())->get_data_column_ptr();
+ auto nested = change_null_to_true(std::move(nested_column));
+ return ColumnConst::create(std::move(nested), rows);
+ }
+
+ auto mutable_column = IColumn::mutate(std::move(column));
+ if (auto* nullable =
check_and_get_column<ColumnNullable>(*mutable_column)) {
auto* __restrict data =
assert_cast<ColumnUInt8*>(nullable->get_nested_column_ptr().get())
->get_data()
.data();
- const NullMap& null_map = nullable->get_null_map_data();
+ NullMap& null_map = nullable->get_null_map_data();
for (size_t i = 0; i < rows; ++i) {
data[i] |= null_map[i];
}
nullable->fill_false_to_nullmap(rows);
- } else if (argument && argument->has_null()) {
+ return mutable_column;
+ }
+
+ if (argument && argument->has_null()) {
const auto* __restrict null_map =
assert_cast<const
ColumnNullable*>(argument.get())->get_null_map_data().data();
- auto* __restrict data =
assert_cast<ColumnUInt8*>(column.get())->get_data().data();
+ auto* __restrict data =
assert_cast<ColumnUInt8*>(mutable_column.get())->get_data().data();
for (size_t i = 0; i < rows; ++i) {
data[i] |= null_map[i];
}
}
+ return mutable_column;
}
inline size_t calculate_false_number(ColumnPtr column) {
diff --git a/be/src/exprs/function/array/function_array_element.h
b/be/src/exprs/function/array/function_array_element.h
index 1ea9f6227af..9b93680846b 100644
--- a/be/src/exprs/function/array/function_array_element.h
+++ b/be/src/exprs/function/array/function_array_element.h
@@ -262,9 +262,8 @@ private:
res_null_map[i] |= outer[i];
}
}
- block.replace_by_position(
- result,
ColumnNullable::create(res_nested->clone_resized(input_rows_count),
- std::move(res_null_column)));
+ block.replace_by_position(result,
+ ColumnNullable::create(res_nested,
std::move(res_null_column)));
return Status::OK();
}
diff --git a/be/src/exprs/function/array/function_array_enumerate.cpp
b/be/src/exprs/function/array/function_array_enumerate.cpp
index 7d82da40ddf..7b57df93deb 100644
--- a/be/src/exprs/function/array/function_array_enumerate.cpp
+++ b/be/src/exprs/function/array/function_array_enumerate.cpp
@@ -106,8 +106,7 @@ public:
ColumnPtr res_column =
ColumnArray::create(std::move(nested_column),
array->get_offsets_ptr());
if (const auto* nullable =
check_and_get_column<ColumnNullable>(left_column.get())) {
- res_column = ColumnNullable::create(
- res_column,
nullable->get_null_map_column().clone_resized(nullable->size()));
+ res_column = ColumnNullable::create(res_column,
nullable->get_null_map_column_ptr());
}
block.replace_by_position(result, std::move(res_column));
return Status::OK();
diff --git a/be/src/exprs/function/array/function_array_enumerate_uniq.cpp
b/be/src/exprs/function/array/function_array_enumerate_uniq.cpp
index bff8758352e..c91b3a420b3 100644
--- a/be/src/exprs/function/array/function_array_enumerate_uniq.cpp
+++ b/be/src/exprs/function/array/function_array_enumerate_uniq.cpp
@@ -198,9 +198,8 @@ public:
auto left_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
if (const auto* nullable =
check_and_get_column<ColumnNullable>(left_column.get())) {
- res_column = ColumnNullable::create(
- res_column,
-
nullable->get_null_map_column().clone_resized(nullable->size()));
+ res_column =
+ ColumnNullable::create(res_column,
nullable->get_null_map_column_ptr());
}
}
diff --git a/be/src/exprs/function/array/function_array_exists.cpp
b/be/src/exprs/function/array/function_array_exists.cpp
index ffa74d24e8d..3ae69836270 100644
--- a/be/src/exprs/function/array/function_array_exists.cpp
+++ b/be/src/exprs/function/array/function_array_exists.cpp
@@ -68,19 +68,16 @@ public:
const auto first_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const ColumnArray& first_col_array = assert_cast<const
ColumnArray&>(*first_column);
- const auto& first_off_data = first_col_array.get_offsets_column();
-
const auto& nested_nullable_column =
assert_cast<const
ColumnNullable&>(*first_col_array.get_data_ptr());
const auto nested_column =
nested_nullable_column.get_nested_column_ptr();
const size_t nested_column_size = nested_column->size();
- MutableColumnPtr result_null_map =
-
nested_nullable_column.get_null_map_column_ptr()->clone_resized(nested_column_size);
+ ColumnPtr result_null_map =
nested_nullable_column.get_null_map_column_ptr();
// 2. compute result
auto result_column = ColumnUInt8::create(nested_column_size, 0);
auto* __restrict result_column_data = result_column->get_data().data();
- MutableColumnPtr result_offset_column =
first_off_data.clone_resized(first_off_data.size());
+ ColumnPtr result_offset_column = first_col_array.get_offsets_ptr();
const auto* __restrict nested_column_data =
assert_cast<const
ColumnUInt8&>(*nested_column).get_data().data();
@@ -89,9 +86,8 @@ public:
}
ColumnPtr result_nullalble_column =
- ColumnNullable::create(std::move(result_column),
std::move(result_null_map));
- ColumnPtr column_array =
- ColumnArray::create(result_nullalble_column,
std::move(result_offset_column));
+ ColumnNullable::create(result_column->get_ptr(),
result_null_map);
+ ColumnPtr column_array = ColumnArray::create(result_nullalble_column,
result_offset_column);
block.replace_by_position(result, column_array);
return Status::OK();
}
diff --git a/be/src/exprs/function/array/function_array_sortby.cpp
b/be/src/exprs/function/array/function_array_sortby.cpp
index 8b8f3589d96..ad8cfb09697 100644
--- a/be/src/exprs/function/array/function_array_sortby.cpp
+++ b/be/src/exprs/function/array/function_array_sortby.cpp
@@ -89,13 +89,12 @@ public:
assert_cast<const
ColumnNullable&>(key_column_array.get_data());
auto result_data_column = src_nested_nullable_column.clone_empty();
- auto result_offset_column =
-
src_column_array.get_offsets_column().clone_resized(input_rows_count);
- MutableColumnPtr result_nullmap = nullptr;
+ ColumnPtr result_offset_column = src_column_array.get_offsets_ptr();
+ ColumnPtr result_nullmap = nullptr;
const ColumnUInt8::Container* src_null_map_data = nullptr;
if (argument_nullmap[0]) {
const auto& src_column_nullmap = assert_cast<const
ColumnUInt8&>(*argument_nullmap[0]);
- result_nullmap =
src_column_nullmap.clone_resized(input_rows_count);
+ result_nullmap = argument_nullmap[0];
src_null_map_data = &(src_column_nullmap.get_data());
}
const ColumnUInt8::Container* key_null_map_data = nullptr;
@@ -151,11 +150,11 @@ public:
block.replace_by_position(
result,
ColumnNullable::create(ColumnArray::create(std::move(result_data_column),
-
std::move(result_offset_column)),
- std::move(result_nullmap)));
+
result_offset_column),
+ result_nullmap));
} else {
block.replace_by_position(result,
ColumnArray::create(std::move(result_data_column),
-
std::move(result_offset_column)));
+
result_offset_column));
}
return Status::OK();
}
diff --git a/be/src/exprs/function/cast/cast_to_variant.h
b/be/src/exprs/function/cast/cast_to_variant.h
index 0efc29047b1..bfef6b2918a 100644
--- a/be/src/exprs/function/cast/cast_to_variant.h
+++ b/be/src/exprs/function/cast/cast_to_variant.h
@@ -46,10 +46,8 @@ inline Status cast_from_variant_impl(FunctionContext*
context, Block& block,
auto finalized_variant = variant->clone_finalized();
variant = assert_cast<const ColumnVariant*>(finalized_variant.get());
if (nullable != nullptr) {
- auto cloned_null_map =
-
nullable->get_null_map_column_ptr()->clone_resized(input_rows_count);
finalized_input_column =
ColumnNullable::create(std::move(finalized_variant),
-
std::move(cloned_null_map));
+
nullable->get_null_map_column_ptr());
} else {
finalized_input_column = std::move(finalized_variant);
}
diff --git a/be/src/exprs/function/function.cpp
b/be/src/exprs/function/function.cpp
index 590eb638292..6d33b1caa4a 100644
--- a/be/src/exprs/function/function.cpp
+++ b/be/src/exprs/function/function.cpp
@@ -62,7 +62,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block&
block, const Colum
if (auto nullable = cast_to_column<ColumnNullable>(elem.column);
nullable->has_null()) {
const ColumnPtr& null_map_column =
nullable->get_null_map_column_ptr();
if (!result_null_map_column) { // NOLINT(bugprone-use-after-move)
- result_null_map_column =
null_map_column->clone_resized(input_rows_count);
+ result_null_map_column = null_map_column;
continue;
}
diff --git a/be/src/exprs/function/function_date_or_datetime_to_string.cpp
b/be/src/exprs/function/function_date_or_datetime_to_string.cpp
index d808d806307..c7b240df961 100644
--- a/be/src/exprs/function/function_date_or_datetime_to_string.cpp
+++ b/be/src/exprs/function/function_date_or_datetime_to_string.cpp
@@ -166,10 +166,8 @@ public:
if (null_map) {
const auto* nullable_col = assert_cast<const
ColumnNullable*>(source_col.get());
block.replace_by_position(
- result,
- ColumnNullable::create(std::move(col_res),
-
nullable_col->get_null_map_column_ptr()->clone_resized(
- input_rows_count)));
+ result, ColumnNullable::create(std::move(col_res),
+
nullable_col->get_null_map_column_ptr()));
} else {
block.replace_by_position(result, std::move(col_res));
}
diff --git a/be/src/exprs/function/function_nullables.cpp
b/be/src/exprs/function/function_nullables.cpp
index 902d5ebe2c1..99a9a427195 100644
--- a/be/src/exprs/function/function_nullables.cpp
+++ b/be/src/exprs/function/function_nullables.cpp
@@ -59,7 +59,7 @@ public:
block.replace_by_position(
result, ColumnNullable::create(col,
ColumnBool::create(input_rows_count, 0)));
} else { // column is ColumnNullable
- block.replace_by_position(result,
col->clone_resized(input_rows_count));
+ block.replace_by_position(result, col);
}
return Status::OK();
}
@@ -93,9 +93,9 @@ public:
data.name);
}
const ColumnPtr& nest_col = col_null->get_nested_column_ptr();
- block.replace_by_position(result,
nest_col->clone_resized(nest_col->size()));
+ block.replace_by_position(result, nest_col);
} else {
- block.replace_by_position(result,
data.column->clone_resized(input_rows_count));
+ block.replace_by_position(result, data.column);
}
return Status::OK();
}
diff --git a/be/src/exprs/function/function_variant_element.cpp
b/be/src/exprs/function/function_variant_element.cpp
index 012f11e80c9..d045eaa1126 100644
--- a/be/src/exprs/function/function_variant_element.cpp
+++ b/be/src/exprs/function/function_variant_element.cpp
@@ -92,8 +92,7 @@ public:
}
if (var.is_scalar_variant() && is_column_nullable(*var.get_root())) {
const auto* nullable = assert_cast<const
ColumnNullable*>(var.get_root().get());
- return ColumnNullable::create(
- col,
nullable->get_null_map_column_ptr()->clone_resized(col->size()));
+ return ColumnNullable::create(col,
nullable->get_null_map_column_ptr());
}
return make_nullable(col);
}
diff --git a/be/src/exprs/runtime_filter_expr.cpp
b/be/src/exprs/runtime_filter_expr.cpp
index e335e8ed4da..8544c809206 100644
--- a/be/src/exprs/runtime_filter_expr.cpp
+++ b/be/src/exprs/runtime_filter_expr.cpp
@@ -121,7 +121,7 @@ Status RuntimeFilterExpr::execute_filter(VExprContext*
context, const Block* blo
// bloom filter will handle null aware inside itself
if (_null_aware && TExprNodeType::BLOOM_PRED != node_type()) {
DCHECK(arg_column);
- change_null_to_true(filter_column->assert_mutable(), arg_column);
+ filter_column = change_null_to_true(std::move(filter_column),
arg_column);
}
if (const auto* const_column =
check_and_get_column<ColumnConst>(*filter_column)) {
diff --git a/be/src/exprs/vtopn_pred.h b/be/src/exprs/vtopn_pred.h
index 526787bccc6..94887588f53 100644
--- a/be/src/exprs/vtopn_pred.h
+++ b/be/src/exprs/vtopn_pred.h
@@ -118,7 +118,7 @@ public:
result_column =
std::move(temp_block.get_by_position(num_columns_without_result).column);
if (is_nullable() && _predicate->nulls_first()) {
// null values are always not filtered
- change_null_to_true(result_column->assert_mutable());
+ result_column = change_null_to_true(std::move(result_column));
}
DCHECK_EQ(result_column->size(), count);
return Status::OK();
diff --git a/be/test/exprs/function/cast/function_variant_cast_test.cpp
b/be/test/exprs/function/cast/function_variant_cast_test.cpp
index 51034ad6e03..7576bc6cab9 100644
--- a/be/test/exprs/function/cast/function_variant_cast_test.cpp
+++ b/be/test/exprs/function/cast/function_variant_cast_test.cpp
@@ -523,15 +523,15 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
"{\"v\":{\"a\":20,\"b\":\"20\",\"c\":20,\"e\":\"50\",\"f\":20}}");
}
- // Test case 5: nullable source null-map is preserved after the nested
string cast is limited
- // to input_rows_count.
+ // Test case 5: nullable source null-map is preserved.
{
auto variant_col = construct_basic_varint_column();
variant_col->finalize();
- auto null_map = ColumnUInt8::create(variant_col->size(), 0);
+ auto single_variant_col = variant_col->cut(0, 1);
+ auto null_map = ColumnUInt8::create(single_variant_col->size(), 0);
null_map->get_data()[0] = 1;
auto nullable_variant_col =
- ColumnNullable::create(std::move(variant_col),
std::move(null_map));
+ ColumnNullable::create(std::move(single_variant_col),
std::move(null_map));
auto nullable_string_type =
make_nullable(std::make_shared<DataTypeString>());
auto variant_type = std::make_shared<DataTypeVariant>();
@@ -550,7 +550,9 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
block.insert({nullptr, nullable_string_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
- ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
1).ok());
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
+ nullable_variant_col->size())
+ .ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]