This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 33ebe47905 Revert "[refactor](join) improve join node output when
build table rows is 0 (#23713)"
33ebe47905 is described below
commit 33ebe47905c6949d472b0221f84c9190e5ab7abf
Author: Kang <[email protected]>
AuthorDate: Mon Sep 4 18:04:06 2023 +0800
Revert "[refactor](join) improve join node output when build table rows is
0 (#23713)"
This reverts commit 32f5749dcc2279adfdf75f2e96c6e920ddd8dd0f.
---
be/src/vec/exec/join/vhash_join_node.cpp | 47 ++------------------------------
be/src/vec/exec/join/vhash_join_node.h | 8 ------
be/src/vec/exec/join/vjoin_node_base.h | 7 +----
be/src/vec/utils/util.hpp | 10 -------
4 files changed, 3 insertions(+), 69 deletions(-)
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp
b/be/src/vec/exec/join/vhash_join_node.cpp
index 67af01ba7b..7d9d3ebd89 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -494,7 +494,7 @@ Status HashJoinNode::prepare(RuntimeState* state) {
// right table data types
_right_table_data_types =
VectorizedUtils::get_data_types(child(1)->row_desc());
_left_table_data_types =
VectorizedUtils::get_data_types(child(0)->row_desc());
- _right_table_column_names =
VectorizedUtils::get_column_names(child(1)->row_desc());
+
// Hash Table Init
_hash_table_init(state);
_construct_mutable_join_block();
@@ -534,48 +534,6 @@ Status HashJoinNode::pull(doris::RuntimeState* state,
vectorized::Block* output_
*eos = true;
return Status::OK();
}
- //TODO: this short circuit maybe could refactor, no need to check at here.
- if (_short_circuit_for_probe_and_additional_data) {
- // when build table rows is 0 and not have other_join_conjunct and
join type is one of LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
- // we could get the result is probe table + null-column(if need output)
- // If we use a short-circuit strategy, should return block directly by
add additional null data.
- auto block_rows = _probe_block.rows();
- if (_probe_eos && block_rows == 0) {
- *eos = _probe_eos;
- return Status::OK();
- }
-
- Block temp_block;
- //get probe side output column
- for (int i = 0; i < _left_output_slot_flags.size(); ++i) {
- temp_block.insert(_probe_block.get_by_position(i));
- }
-
- //create build side null column, if need output
- for (int i = 0;
- (_join_op != TJoinOp::LEFT_ANTI_JOIN) && i <
_right_output_slot_flags.size(); ++i) {
- auto type = remove_nullable(_right_table_data_types[i]);
- auto column = type->create_column();
- column->resize(block_rows);
- auto null_map_column = ColumnVector<UInt8>::create(block_rows, 1);
- auto nullable_column =
- ColumnNullable::create(std::move(column),
std::move(null_map_column));
- temp_block.insert({std::move(nullable_column), make_nullable(type),
- _right_table_column_names[i]});
- }
-
- {
- SCOPED_TIMER(_join_filter_timer);
- RETURN_IF_ERROR(
- VExprContext::filter_block(_conjuncts, &temp_block,
temp_block.columns()));
- }
-
- RETURN_IF_ERROR(_build_output_block(&temp_block, output_block, false));
- temp_block.clear();
- release_block_memory(_probe_block);
- reached_limit(output_block, eos);
- return Status::OK();
- }
_join_block.clear_column_data();
MutableBlock mutable_join_block(&_join_block);
@@ -650,7 +608,6 @@ Status HashJoinNode::pull(doris::RuntimeState* state,
vectorized::Block* output_
if (!st) {
return st;
}
-
if (_is_outer_join) {
_add_tuple_is_null_column(&temp_block);
}
@@ -664,8 +621,8 @@ Status HashJoinNode::pull(doris::RuntimeState* state,
vectorized::Block* output_
// Here make _join_block release the columns' ptr
_join_block.set_columns(_join_block.clone_empty_columns());
mutable_join_block.clear();
- RETURN_IF_ERROR(_build_output_block(&temp_block, output_block, false));
+ RETURN_IF_ERROR(_build_output_block(&temp_block, output_block, false));
_reset_tuple_is_null_column();
reached_limit(output_block, eos);
return Status::OK();
diff --git a/be/src/vec/exec/join/vhash_join_node.h
b/be/src/vec/exec/join/vhash_join_node.h
index b7cb5182e1..096a9148cc 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -271,13 +271,6 @@ private:
(_build_blocks->empty() && _join_op ==
TJoinOp::RIGHT_OUTER_JOIN) ||
(_build_blocks->empty() && _join_op ==
TJoinOp::RIGHT_SEMI_JOIN) ||
(_build_blocks->empty() && _join_op ==
TJoinOp::RIGHT_ANTI_JOIN);
-
- //when build table rows is 0 and not have other_join_conjunct and not
_is_mark_join and join type is one of
LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
- //we could get the result is probe table + null-column(if need output)
- _short_circuit_for_probe_and_additional_data =
- (_build_blocks->empty() && !_have_other_join_conjunct &&
!_is_mark_join) &&
- (_join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op ==
TJoinOp::FULL_OUTER_JOIN ||
- _join_op == TJoinOp::LEFT_ANTI_JOIN);
}
// probe expr
@@ -298,7 +291,6 @@ private:
DataTypes _right_table_data_types;
DataTypes _left_table_data_types;
- std::vector<std::string> _right_table_column_names;
RuntimeProfile::Counter* _build_table_timer;
RuntimeProfile::Counter* _build_expr_call_timer;
diff --git a/be/src/vec/exec/join/vjoin_node_base.h
b/be/src/vec/exec/join/vjoin_node_base.h
index 62289044a8..8756c24d20 100644
--- a/be/src/vec/exec/join/vjoin_node_base.h
+++ b/be/src/vec/exec/join/vjoin_node_base.h
@@ -97,10 +97,7 @@ protected:
// Materialize build relation. For HashJoin, it will build a hash table
while a list of build blocks for NLJoin.
virtual Status _materialize_build_side(RuntimeState* state) = 0;
- virtual void _init_short_circuit_for_probe() {
- _short_circuit_for_probe = false;
- _short_circuit_for_probe_and_additional_data = false;
- }
+ virtual void _init_short_circuit_for_probe() { _short_circuit_for_probe =
false; }
TJoinOp::type _join_op;
JoinOpVariants _join_op_variants;
@@ -127,8 +124,6 @@ protected:
// 2. build side rows is empty, Join op is: inner join/right outer
join/left semi/right semi/right anti
bool _short_circuit_for_probe = false;
- // for some join, when build side rows is empty, we could return directly
by add some additional null data in probe table.
- bool _short_circuit_for_probe_and_additional_data = false;
std::unique_ptr<RowDescriptor> _output_row_desc;
std::unique_ptr<RowDescriptor> _intermediate_row_desc;
// output expr
diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp
index 440bbff153..a57e5c1705 100644
--- a/be/src/vec/utils/util.hpp
+++ b/be/src/vec/utils/util.hpp
@@ -123,16 +123,6 @@ public:
return data_types;
}
- static std::vector<std::string> get_column_names(const RowDescriptor&
row_desc) {
- std::vector<std::string> column_names;
- for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
- for (const auto& slot_desc : tuple_desc->slots()) {
- column_names.push_back(slot_desc->col_name());
- }
- }
- return column_names;
- }
-
static bool all_arguments_are_constant(const Block& block, const
ColumnNumbers& args) {
for (const auto& arg : args) {
if (!is_column_const(*block.get_by_position(arg).column)) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]