This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7fa7efb2e9e [opt](exec) opt const expr exec in set operator (#34823)
7fa7efb2e9e is described below
commit 7fa7efb2e9e639914fce5acc235f60b06603e39d
Author: HappenLee <[email protected]>
AuthorDate: Thu May 16 14:09:39 2024 +0800
[opt](exec) opt const expr exec in set operator (#34823)
---
be/src/pipeline/exec/set_sink_operator.cpp | 30 +++++++++++++++++++-------
be/src/pipeline/exec/set_sink_operator.h | 3 ++-
be/src/vec/exec/vset_operation_node.cpp | 34 ++++++++++++++++++------------
be/src/vec/exec/vset_operation_node.h | 2 +-
4 files changed, 46 insertions(+), 23 deletions(-)
diff --git a/be/src/pipeline/exec/set_sink_operator.cpp
b/be/src/pipeline/exec/set_sink_operator.cpp
index ef2f1463e7c..be44d4dbf7d 100644
--- a/be/src/pipeline/exec/set_sink_operator.cpp
+++ b/be/src/pipeline/exec/set_sink_operator.cpp
@@ -86,7 +86,7 @@ Status SetSinkOperatorX<is_intersect>::_process_build_block(
vectorized::materialize_block_inplace(block);
vectorized::ColumnRawPtrs raw_ptrs(_child_exprs.size());
- RETURN_IF_ERROR(_extract_build_column(local_state, block, raw_ptrs));
+ RETURN_IF_ERROR(_extract_build_column(local_state, block, raw_ptrs, rows));
std::visit(
[&](auto&& arg) {
@@ -108,20 +108,34 @@ Status
SetSinkOperatorX<is_intersect>::_process_build_block(
template <bool is_intersect>
Status SetSinkOperatorX<is_intersect>::_extract_build_column(
SetSinkLocalState<is_intersect>& local_state, vectorized::Block& block,
- vectorized::ColumnRawPtrs& raw_ptrs) {
+ vectorized::ColumnRawPtrs& raw_ptrs, size_t& rows) {
+ std::vector<int> result_locs(_child_exprs.size(), -1);
+ bool is_all_const = true;
+
+ for (size_t i = 0; i < _child_exprs.size(); ++i) {
+ RETURN_IF_ERROR(_child_exprs[i]->execute(&block, &result_locs[i]));
+ is_all_const &=
is_column_const(*block.get_by_position(result_locs[i]).column);
+ }
+ rows = is_all_const ? 1 : rows;
+
for (size_t i = 0; i < _child_exprs.size(); ++i) {
- int result_col_id = -1;
- RETURN_IF_ERROR(_child_exprs[i]->execute(&block, &result_col_id));
+ int result_col_id = result_locs[i];
- block.get_by_position(result_col_id).column =
-
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
+ if (is_all_const) {
+ block.get_by_position(result_col_id).column =
+ assert_cast<const vectorized::ColumnConst&>(
+ *block.get_by_position(result_col_id).column)
+ .get_data_column_ptr();
+ } else {
+ block.get_by_position(result_col_id).column =
+
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
+ }
if (local_state._shared_state->build_not_ignore_null[i]) {
block.get_by_position(result_col_id).column =
make_nullable(block.get_by_position(result_col_id).column);
}
- const auto* column = block.get_by_position(result_col_id).column.get();
- raw_ptrs[i] = column;
+ raw_ptrs[i] = block.get_by_position(result_col_id).column.get();
DCHECK_GE(result_col_id, 0);
local_state._shared_state->build_col_idx.insert({result_col_id, i});
}
diff --git a/be/src/pipeline/exec/set_sink_operator.h
b/be/src/pipeline/exec/set_sink_operator.h
index 1c579d679ff..09a1fa09e7c 100644
--- a/be/src/pipeline/exec/set_sink_operator.h
+++ b/be/src/pipeline/exec/set_sink_operator.h
@@ -103,7 +103,8 @@ private:
Status _process_build_block(SetSinkLocalState<is_intersect>& local_state,
vectorized::Block& block, RuntimeState* state);
Status _extract_build_column(SetSinkLocalState<is_intersect>& local_state,
- vectorized::Block& block,
vectorized::ColumnRawPtrs& raw_ptrs);
+ vectorized::Block& block,
vectorized::ColumnRawPtrs& raw_ptrs,
+ size_t& rows);
const int _cur_child_id;
const int _child_quantity;
diff --git a/be/src/vec/exec/vset_operation_node.cpp
b/be/src/vec/exec/vset_operation_node.cpp
index 209e755636e..c207fb18f05 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -24,10 +24,8 @@
#include <ostream>
#include <string>
-#include <type_traits>
#include <utility>
-#include "runtime/define_primitive_type.h"
#include "runtime/runtime_state.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/hash_table/hash_table_set_build.h"
@@ -35,7 +33,6 @@
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/materialize_block.h"
#include "vec/core/types.h"
-#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/exec/join/join_op.h"
#include "vec/exprs/vexpr.h"
@@ -309,9 +306,8 @@ Status
VSetOperationNode<is_intersect>::process_build_block(Block& block, Runtim
return Status::OK();
}
- vectorized::materialize_block_inplace(block);
ColumnRawPtrs raw_ptrs(_child_expr_lists[0].size());
- RETURN_IF_ERROR(extract_build_column(block, raw_ptrs));
+ RETURN_IF_ERROR(extract_build_column(block, raw_ptrs, rows));
auto st = Status::OK();
std::visit(
[&](auto&& arg) {
@@ -411,20 +407,32 @@ bool
VSetOperationNode<is_intersect>::is_child_finished(int child_id) const {
}
template <bool is_intersect>
-Status VSetOperationNode<is_intersect>::extract_build_column(Block& block,
- ColumnRawPtrs&
raw_ptrs) {
+Status VSetOperationNode<is_intersect>::extract_build_column(Block& block,
ColumnRawPtrs& raw_ptrs,
+ size_t& rows) {
+ std::vector<int> result_locs(_child_expr_lists[0].size(), -1);
+ bool is_all_const = true;
+
for (size_t i = 0; i < _child_expr_lists[0].size(); ++i) {
- int result_col_id = -1;
- RETURN_IF_ERROR(_child_expr_lists[0][i]->execute(&block,
&result_col_id));
+ RETURN_IF_ERROR(_child_expr_lists[0][i]->execute(&block,
&result_locs[i]));
+ is_all_const &=
is_column_const(*block.get_by_position(result_locs[i]).column);
+ }
+ rows = is_all_const ? 1 : rows;
- block.get_by_position(result_col_id).column =
-
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
+ for (size_t i = 0; i < result_locs.size(); ++i) {
+ int result_col_id = result_locs[i];
+ if (is_all_const) {
+ block.get_by_position(result_col_id).column =
+ assert_cast<const
ColumnConst&>(*block.get_by_position(result_col_id).column)
+ .get_data_column_ptr();
+ } else {
+ block.get_by_position(result_col_id).column =
+
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
+ }
if (_build_not_ignore_null[i]) {
block.get_by_position(result_col_id).column =
make_nullable(block.get_by_position(result_col_id).column);
}
- const auto* column = block.get_by_position(result_col_id).column.get();
- raw_ptrs[i] = column;
+ raw_ptrs[i] = block.get_by_position(result_col_id).column.get();
DCHECK_GE(result_col_id, 0);
_build_col_idx.insert({result_col_id, i});
}
diff --git a/be/src/vec/exec/vset_operation_node.h
b/be/src/vec/exec/vset_operation_node.h
index ce5a8eb1dbc..508f8073689 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -88,7 +88,7 @@ private:
void hash_table_init();
Status hash_table_build(RuntimeState* state);
Status process_build_block(Block& block, RuntimeState* state);
- Status extract_build_column(Block& block, ColumnRawPtrs& raw_ptrs);
+ Status extract_build_column(Block& block, ColumnRawPtrs& raw_ptrs, size_t&
row_num);
Status extract_probe_column(Block& block, ColumnRawPtrs& raw_ptrs, int
child_id);
void refresh_hash_table();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]