This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 454e2c0cdc [pick](branch-2.0) cherry-pick two pr from master to 
branch2.0 #23713 #23974 (#24118)
454e2c0cdc is described below

commit 454e2c0cdccfb1c263199cd6d4e78d91d9a3eda8
Author: zhangstar333 <[email protected]>
AuthorDate: Fri Sep 8 23:07:47 2023 +0800

    [pick](branch-2.0) cherry-pick two pr from master to branch2.0 #23713 
#23974 (#24118)
---
 be/src/vec/exec/join/vhash_join_node.cpp | 66 +++++++++++++++++++++++++++-----
 be/src/vec/exec/join/vhash_join_node.h   | 11 ++++++
 be/src/vec/exec/join/vjoin_node_base.h   |  7 +++-
 be/src/vec/utils/util.hpp                | 10 +++++
 4 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 7d9d3ebd89..284a193779 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -494,7 +494,7 @@ Status HashJoinNode::prepare(RuntimeState* state) {
     // right table data types
     _right_table_data_types = 
VectorizedUtils::get_data_types(child(1)->row_desc());
     _left_table_data_types = 
VectorizedUtils::get_data_types(child(0)->row_desc());
-
+    _right_table_column_names = 
VectorizedUtils::get_column_names(child(1)->row_desc());
     // Hash Table Init
     _hash_table_init(state);
     _construct_mutable_join_block();
@@ -534,6 +534,48 @@ Status HashJoinNode::pull(doris::RuntimeState* state, 
vectorized::Block* output_
         *eos = true;
         return Status::OK();
     }
+    //TODO: this short circuit maybe could refactor, no need to check at here.
+    if (_short_circuit_for_probe_and_additional_data) {
+        // when build table rows is 0 and not have other_join_conjunct and 
join type is one of LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
+        // we could get the result is probe table + null-column(if need output)
+        // If we use a short-circuit strategy, should return block directly by 
add additional null data.
+        auto block_rows = _probe_block.rows();
+        if (_probe_eos && block_rows == 0) {
+            *eos = _probe_eos;
+            return Status::OK();
+        }
+
+        Block temp_block;
+        //get probe side output column
+        for (int i = 0; i < _left_output_slot_flags.size(); ++i) {
+            temp_block.insert(_probe_block.get_by_position(i));
+        }
+
+        //create build side null column, if need output
+        for (int i = 0;
+             (_join_op != TJoinOp::LEFT_ANTI_JOIN) && i < 
_right_output_slot_flags.size(); ++i) {
+            auto type = remove_nullable(_right_table_data_types[i]);
+            auto column = type->create_column();
+            column->resize(block_rows);
+            auto null_map_column = ColumnVector<UInt8>::create(block_rows, 1);
+            auto nullable_column =
+                    ColumnNullable::create(std::move(column), 
std::move(null_map_column));
+            temp_block.insert({std::move(nullable_column), make_nullable(type),
+                               _right_table_column_names[i]});
+        }
+        if (_is_outer_join) {
+            
reinterpret_cast<ColumnUInt8*>(_tuple_is_null_left_flag_column.get())
+                    ->get_data()
+                    .resize_fill(block_rows, 0);
+            
reinterpret_cast<ColumnUInt8*>(_tuple_is_null_right_flag_column.get())
+                    ->get_data()
+                    .resize_fill(block_rows, 1);
+        }
+        RETURN_IF_ERROR(_filter_data_and_build_output(state, output_block, 
eos, &temp_block));
+        temp_block.clear();
+        release_block_memory(_probe_block);
+        return Status::OK();
+    }
     _join_block.clear_column_data();
 
     MutableBlock mutable_join_block(&_join_block);
@@ -608,21 +650,27 @@ Status HashJoinNode::pull(doris::RuntimeState* state, 
vectorized::Block* output_
     if (!st) {
         return st;
     }
+    RETURN_IF_ERROR(_filter_data_and_build_output(state, output_block, eos, 
&temp_block));
+    // Here make _join_block release the columns' ptr
+    _join_block.set_columns(_join_block.clone_empty_columns());
+    mutable_join_block.clear();
+    return Status::OK();
+}
+
+Status HashJoinNode::_filter_data_and_build_output(RuntimeState* state,
+                                                   vectorized::Block* 
output_block, bool* eos,
+                                                   Block* temp_block) {
     if (_is_outer_join) {
-        _add_tuple_is_null_column(&temp_block);
+        _add_tuple_is_null_column(temp_block);
     }
-    auto output_rows = temp_block.rows();
+    auto output_rows = temp_block->rows();
     DCHECK(output_rows <= state->batch_size());
     {
         SCOPED_TIMER(_join_filter_timer);
-        RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, &temp_block, 
temp_block.columns()));
+        RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, temp_block, 
temp_block->columns()));
     }
 
-    // Here make _join_block release the columns' ptr
-    _join_block.set_columns(_join_block.clone_empty_columns());
-    mutable_join_block.clear();
-
-    RETURN_IF_ERROR(_build_output_block(&temp_block, output_block, false));
+    RETURN_IF_ERROR(_build_output_block(temp_block, output_block, false));
     _reset_tuple_is_null_column();
     reached_limit(output_block, eos);
     return Status::OK();
diff --git a/be/src/vec/exec/join/vhash_join_node.h 
b/be/src/vec/exec/join/vhash_join_node.h
index 096a9148cc..639b570934 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -271,6 +271,13 @@ private:
                 (_build_blocks->empty() && _join_op == 
TJoinOp::RIGHT_OUTER_JOIN) ||
                 (_build_blocks->empty() && _join_op == 
TJoinOp::RIGHT_SEMI_JOIN) ||
                 (_build_blocks->empty() && _join_op == 
TJoinOp::RIGHT_ANTI_JOIN);
+
+        //when build table rows is 0 and not have other_join_conjunct and not 
_is_mark_join and join type is one of 
LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
+        //we could get the result is probe table + null-column(if need output)
+        _short_circuit_for_probe_and_additional_data =
+                (_build_blocks->empty() && !_have_other_join_conjunct && 
!_is_mark_join) &&
+                (_join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op == 
TJoinOp::FULL_OUTER_JOIN ||
+                 _join_op == TJoinOp::LEFT_ANTI_JOIN);
     }
 
     // probe expr
@@ -291,6 +298,7 @@ private:
 
     DataTypes _right_table_data_types;
     DataTypes _left_table_data_types;
+    std::vector<std::string> _right_table_column_names;
 
     RuntimeProfile::Counter* _build_table_timer;
     RuntimeProfile::Counter* _build_expr_call_timer;
@@ -395,6 +403,9 @@ private:
     // add tuple is null flag column to Block for filter conjunct and output 
expr
     void _add_tuple_is_null_column(Block* block) override;
 
+    Status _filter_data_and_build_output(RuntimeState* state, 
vectorized::Block* output_block,
+                                         bool* eos, Block* temp_block);
+
     template <class HashTableContext>
     friend struct ProcessHashTableBuild;
 
diff --git a/be/src/vec/exec/join/vjoin_node_base.h 
b/be/src/vec/exec/join/vjoin_node_base.h
index 8756c24d20..62289044a8 100644
--- a/be/src/vec/exec/join/vjoin_node_base.h
+++ b/be/src/vec/exec/join/vjoin_node_base.h
@@ -97,7 +97,10 @@ protected:
     // Materialize build relation. For HashJoin, it will build a hash table 
while a list of build blocks for NLJoin.
     virtual Status _materialize_build_side(RuntimeState* state) = 0;
 
-    virtual void _init_short_circuit_for_probe() { _short_circuit_for_probe = 
false; }
+    virtual void _init_short_circuit_for_probe() {
+        _short_circuit_for_probe = false;
+        _short_circuit_for_probe_and_additional_data = false;
+    }
 
     TJoinOp::type _join_op;
     JoinOpVariants _join_op_variants;
@@ -124,6 +127,8 @@ protected:
     // 2. build side rows is empty, Join op is: inner join/right outer 
join/left semi/right semi/right anti
     bool _short_circuit_for_probe = false;
 
+    // for some join, when build side rows is empty, we could return directly 
by add some additional null data in probe table.
+    bool _short_circuit_for_probe_and_additional_data = false;
     std::unique_ptr<RowDescriptor> _output_row_desc;
     std::unique_ptr<RowDescriptor> _intermediate_row_desc;
     // output expr
diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp
index a57e5c1705..440bbff153 100644
--- a/be/src/vec/utils/util.hpp
+++ b/be/src/vec/utils/util.hpp
@@ -123,6 +123,16 @@ public:
         return data_types;
     }
 
+    static std::vector<std::string> get_column_names(const RowDescriptor& 
row_desc) {
+        std::vector<std::string> column_names;
+        for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
+            for (const auto& slot_desc : tuple_desc->slots()) {
+                column_names.push_back(slot_desc->col_name());
+            }
+        }
+        return column_names;
+    }
+
     static bool all_arguments_are_constant(const Block& block, const 
ColumnNumbers& args) {
         for (const auto& arg : args) {
             if (!is_column_const(*block.get_by_position(arg).column)) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to